d1f2745be8dc45481bb90bd1dd4f491a417cf580
[openwrt/openwrt.git] /
1 From 038efbab4ef95fd22ee6b25027cb6cf25248ea3d Mon Sep 17 00:00:00 2001
2 From: John Cox <john.cox@raspberrypi.com>
3 Date: Thu, 6 Feb 2025 18:02:37 +0000
4 Subject: [PATCH] media: platform: Add Raspberry Pi HEVC decoder driver
5
6 The BCM2711 and BCM2712 SoCs used on Rapsberry Pi 4 and Raspberry
7 Pi 5 boards include an HEVC decoder block. Add a driver for it.
8
9 Signed-off-by: John Cox <john.cox@raspberrypi.com>
10 Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
11 ---
12 MAINTAINERS | 10 +
13 drivers/media/platform/raspberrypi/Kconfig | 1 +
14 drivers/media/platform/raspberrypi/Makefile | 1 +
15 .../platform/raspberrypi/hevc_dec/Kconfig | 17 +
16 .../platform/raspberrypi/hevc_dec/Makefile | 5 +
17 .../platform/raspberrypi/hevc_dec/hevc_d.c | 450 +++
18 .../platform/raspberrypi/hevc_dec/hevc_d.h | 189 ++
19 .../raspberrypi/hevc_dec/hevc_d_h265.c | 2542 +++++++++++++++++
20 .../raspberrypi/hevc_dec/hevc_d_h265.h | 23 +
21 .../platform/raspberrypi/hevc_dec/hevc_d_hw.c | 376 +++
22 .../platform/raspberrypi/hevc_dec/hevc_d_hw.h | 303 ++
23 .../raspberrypi/hevc_dec/hevc_d_video.c | 688 +++++
24 .../raspberrypi/hevc_dec/hevc_d_video.h | 38 +
25 13 files changed, 4643 insertions(+)
26 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/Kconfig
27 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/Makefile
28 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/hevc_d.c
29 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/hevc_d.h
30 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.c
31 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.h
32 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.c
33 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.h
34 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.c
35 create mode 100644 drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.h
36
37 --- a/MAINTAINERS
38 +++ b/MAINTAINERS
39 @@ -19359,6 +19359,16 @@ S: Maintained
40 F: Documentation/devicetree/bindings/spi/raspberrypi,rp2040-gpio-bridge.yaml
41 F: drivers/spi/spi-rp2040-gpio-bridge.c
42
43 +RASPBERRY PI HEVC DECODER
44 +M: John Cox <john.cox@raspberrypi.com>
45 +M: Dom Cobley <dom@raspberrypi.com>
46 +M: Dave Stevenson <dave.stevenson@raspberrypi.com>
47 +M: Raspberry Pi Internal Kernel List <kernel-list@raspberrypi.com>
48 +L: linux-media@vger.kernel.org
49 +S: Maintained
50 +F: Documentation/devicetree/bindings/media/raspberrypi,rpi_hevc_dec.yaml
51 +F: drivers/media/platform/raspberrypi/hevc_dec
52 +
53 RASPBERRY PI PISP BACK END
54 M: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
55 L: Raspberry Pi Kernel Maintenance <kernel-list@raspberrypi.com>
56 --- a/drivers/media/platform/raspberrypi/Kconfig
57 +++ b/drivers/media/platform/raspberrypi/Kconfig
58 @@ -2,5 +2,6 @@
59
60 comment "Raspberry Pi media platform drivers"
61
62 +source "drivers/media/platform/raspberrypi/hevc_dec/Kconfig"
63 source "drivers/media/platform/raspberrypi/pisp_be/Kconfig"
64 source "drivers/media/platform/raspberrypi/rp1_cfe/Kconfig"
65 --- a/drivers/media/platform/raspberrypi/Makefile
66 +++ b/drivers/media/platform/raspberrypi/Makefile
67 @@ -1,4 +1,5 @@
68 # SPDX-License-Identifier: GPL-2.0
69
70 +obj-y += hevc_dec/
71 obj-y += pisp_be/
72 obj-y += rp1_cfe/
73 --- /dev/null
74 +++ b/drivers/media/platform/raspberrypi/hevc_dec/Kconfig
75 @@ -0,0 +1,17 @@
76 +# SPDX-License-Identifier: GPL-2.0
77 +
78 +config VIDEO_RPI_HEVC_DEC
79 + tristate "Rasperry Pi HEVC decoder"
80 + depends on VIDEO_DEV && VIDEO_DEV
81 + depends on OF
82 + select MEDIA_CONTROLLER
83 + select MEDIA_CONTROLLER_REQUEST_API
84 + select VIDEOBUF2_DMA_CONTIG
85 + select V4L2_MEM2MEM_DEV
86 + help
87 + Support for the Raspberry Pi HEVC / H265 H/W decoder as a stateless
88 + V4L2 decoder device.
89 +
90 + To compile this driver as a module, choose M here: the module
91 + will be called rpi-hevc-dec.
92 +
93 --- /dev/null
94 +++ b/drivers/media/platform/raspberrypi/hevc_dec/Makefile
95 @@ -0,0 +1,5 @@
96 +# SPDX-License-Identifier: GPL-2.0
97 +obj-$(CONFIG_VIDEO_RPI_HEVC_DEC) += rpi-hevc-dec.o
98 +
99 +rpi-hevc-dec-y = hevc_d.o hevc_d_video.o hevc_d_hw.o\
100 + hevc_d_h265.o
101 --- /dev/null
102 +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.c
103 @@ -0,0 +1,450 @@
104 +// SPDX-License-Identifier: GPL-2.0
105 +/*
106 + * Raspberry Pi HEVC driver
107 + *
108 + * Copyright (C) 2024 Raspberry Pi Ltd
109 + *
110 + * Based on the Cedrus VPU driver, that is:
111 + *
112 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
113 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
114 + * Copyright (C) 2018 Bootlin
115 + */
116 +
117 +#include <linux/platform_device.h>
118 +#include <linux/module.h>
119 +#include <linux/of.h>
120 +
121 +#include <media/v4l2-device.h>
122 +#include <media/v4l2-ioctl.h>
123 +#include <media/v4l2-ctrls.h>
124 +#include <media/v4l2-mem2mem.h>
125 +
126 +#include "hevc_d.h"
127 +#include "hevc_d_h265.h"
128 +#include "hevc_d_video.h"
129 +#include "hevc_d_hw.h"
130 +
131 +static const struct hevc_d_control hevc_d_ctrls[] = {
132 + {
133 + .cfg = {
134 + .id = V4L2_CID_STATELESS_HEVC_SPS,
135 + .ops = &hevc_d_hevc_sps_ctrl_ops,
136 + },
137 + .required = false,
138 + }, {
139 + .cfg = {
140 + .id = V4L2_CID_STATELESS_HEVC_PPS,
141 + .ops = &hevc_d_hevc_pps_ctrl_ops,
142 + },
143 + .required = false,
144 + }, {
145 + .cfg = {
146 + .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
147 + },
148 + .required = false,
149 + }, {
150 + .cfg = {
151 + .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
152 + },
153 + .required = true,
154 + }, {
155 + .cfg = {
156 + .name = "Slice param array",
157 + .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
158 + .type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS,
159 + .flags = V4L2_CTRL_FLAG_DYNAMIC_ARRAY,
160 + .dims = { 0x1000 },
161 + },
162 + .required = true,
163 + }, {
164 + .cfg = {
165 + .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE,
166 + .min = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
167 + .max = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
168 + .def = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
169 + },
170 + .required = false,
171 + }, {
172 + .cfg = {
173 + .id = V4L2_CID_STATELESS_HEVC_START_CODE,
174 + .min = V4L2_STATELESS_HEVC_START_CODE_NONE,
175 + .max = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
176 + .def = V4L2_STATELESS_HEVC_START_CODE_NONE,
177 + },
178 + .required = false,
179 + },
180 +};
181 +
182 +#define HEVC_D_CTRLS_COUNT ARRAY_SIZE(hevc_d_ctrls)
183 +
184 +struct v4l2_ctrl *hevc_d_find_ctrl(struct hevc_d_ctx *ctx, u32 id)
185 +{
186 + unsigned int i;
187 +
188 + for (i = 0; i < HEVC_D_CTRLS_COUNT; i++)
189 + if (ctx->ctrls[i]->id == id)
190 + return ctx->ctrls[i];
191 +
192 + return NULL;
193 +}
194 +
195 +void *hevc_d_find_control_data(struct hevc_d_ctx *ctx, u32 id)
196 +{
197 + struct v4l2_ctrl *const ctrl = hevc_d_find_ctrl(ctx, id);
198 +
199 + return !ctrl ? NULL : ctrl->p_cur.p;
200 +}
201 +
202 +static int hevc_d_init_ctrls(struct hevc_d_dev *dev, struct hevc_d_ctx *ctx)
203 +{
204 + struct v4l2_ctrl_handler *hdl = &ctx->hdl;
205 + struct v4l2_ctrl *ctrl;
206 + unsigned int i;
207 +
208 + v4l2_ctrl_handler_init(hdl, HEVC_D_CTRLS_COUNT);
209 + if (hdl->error) {
210 + v4l2_err(&dev->v4l2_dev,
211 + "Failed to initialize control handler\n");
212 + return hdl->error;
213 + }
214 +
215 + ctx->ctrls = kzalloc(HEVC_D_CTRLS_COUNT * sizeof(ctrl), GFP_KERNEL);
216 + if (!ctx->ctrls)
217 + return -ENOMEM;
218 +
219 + for (i = 0; i < HEVC_D_CTRLS_COUNT; i++) {
220 + ctrl = v4l2_ctrl_new_custom(hdl, &hevc_d_ctrls[i].cfg,
221 + ctx);
222 + if (hdl->error) {
223 + v4l2_err(&dev->v4l2_dev,
224 + "Failed to create new custom control id=%#x\n",
225 + hevc_d_ctrls[i].cfg.id);
226 +
227 + v4l2_ctrl_handler_free(hdl);
228 + kfree(ctx->ctrls);
229 + return hdl->error;
230 + }
231 +
232 + ctx->ctrls[i] = ctrl;
233 + }
234 +
235 + ctx->fh.ctrl_handler = hdl;
236 + v4l2_ctrl_handler_setup(hdl);
237 +
238 + return 0;
239 +}
240 +
241 +static int hevc_d_request_validate(struct media_request *req)
242 +{
243 + struct media_request_object *obj;
244 + struct v4l2_ctrl_handler *parent_hdl, *hdl;
245 + struct hevc_d_ctx *ctx = NULL;
246 + struct v4l2_ctrl *ctrl_test;
247 + unsigned int count;
248 + unsigned int i;
249 +
250 + list_for_each_entry(obj, &req->objects, list) {
251 + struct vb2_buffer *vb;
252 +
253 + if (vb2_request_object_is_buffer(obj)) {
254 + vb = container_of(obj, struct vb2_buffer, req_obj);
255 + ctx = vb2_get_drv_priv(vb->vb2_queue);
256 +
257 + break;
258 + }
259 + }
260 +
261 + if (!ctx)
262 + return -ENOENT;
263 +
264 + count = vb2_request_buffer_cnt(req);
265 + if (!count) {
266 + v4l2_info(&ctx->dev->v4l2_dev,
267 + "No buffer was provided with the request\n");
268 + return -ENOENT;
269 + } else if (count > 1) {
270 + v4l2_info(&ctx->dev->v4l2_dev,
271 + "More than one buffer was provided with the request\n");
272 + return -EINVAL;
273 + }
274 +
275 + parent_hdl = &ctx->hdl;
276 +
277 + hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl);
278 + if (!hdl) {
279 + v4l2_info(&ctx->dev->v4l2_dev, "Missing codec control(s)\n");
280 + return -ENOENT;
281 + }
282 +
283 + for (i = 0; i < HEVC_D_CTRLS_COUNT; i++) {
284 + if (!hevc_d_ctrls[i].required)
285 + continue;
286 +
287 + ctrl_test =
288 + v4l2_ctrl_request_hdl_ctrl_find(hdl,
289 + hevc_d_ctrls[i].cfg.id);
290 + if (!ctrl_test) {
291 + v4l2_info(&ctx->dev->v4l2_dev,
292 + "Missing required codec control %d: id=%#x\n",
293 + i, hevc_d_ctrls[i].cfg.id);
294 + v4l2_ctrl_request_hdl_put(hdl);
295 + return -ENOENT;
296 + }
297 + }
298 +
299 + v4l2_ctrl_request_hdl_put(hdl);
300 +
301 + return vb2_request_validate(req);
302 +}
303 +
304 +static int hevc_d_open(struct file *file)
305 +{
306 + struct hevc_d_dev *dev = video_drvdata(file);
307 + struct hevc_d_ctx *ctx = NULL;
308 + int ret;
309 +
310 + if (mutex_lock_interruptible(&dev->dev_mutex))
311 + return -ERESTARTSYS;
312 +
313 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
314 + if (!ctx) {
315 + mutex_unlock(&dev->dev_mutex);
316 + ret = -ENOMEM;
317 + goto err_unlock;
318 + }
319 +
320 + mutex_init(&ctx->ctx_mutex);
321 +
322 + v4l2_fh_init(&ctx->fh, video_devdata(file));
323 + file->private_data = &ctx->fh;
324 + ctx->dev = dev;
325 +
326 + ret = hevc_d_init_ctrls(dev, ctx);
327 + if (ret)
328 + goto err_free;
329 +
330 + ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx,
331 + &hevc_d_queue_init);
332 + if (IS_ERR(ctx->fh.m2m_ctx)) {
333 + ret = PTR_ERR(ctx->fh.m2m_ctx);
334 + goto err_ctrls;
335 + }
336 +
337 + /* The only bit of format info that we can guess now is H265 src
338 + * Everything else we need more info for
339 + */
340 + hevc_d_prepare_src_format(&ctx->src_fmt);
341 +
342 + v4l2_fh_add(&ctx->fh);
343 +
344 + mutex_unlock(&dev->dev_mutex);
345 +
346 + return 0;
347 +
348 +err_ctrls:
349 + v4l2_ctrl_handler_free(&ctx->hdl);
350 + kfree(ctx->ctrls);
351 +err_free:
352 + mutex_destroy(&ctx->ctx_mutex);
353 + kfree(ctx);
354 +err_unlock:
355 + mutex_unlock(&dev->dev_mutex);
356 +
357 + return ret;
358 +}
359 +
360 +static int hevc_d_release(struct file *file)
361 +{
362 + struct hevc_d_dev *dev = video_drvdata(file);
363 + struct hevc_d_ctx *ctx = container_of(file->private_data,
364 + struct hevc_d_ctx, fh);
365 +
366 + mutex_lock(&dev->dev_mutex);
367 +
368 + v4l2_fh_del(&ctx->fh);
369 + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
370 +
371 + v4l2_ctrl_handler_free(&ctx->hdl);
372 + kfree(ctx->ctrls);
373 +
374 + v4l2_fh_exit(&ctx->fh);
375 + mutex_destroy(&ctx->ctx_mutex);
376 +
377 + kfree(ctx);
378 +
379 + mutex_unlock(&dev->dev_mutex);
380 +
381 + return 0;
382 +}
383 +
384 +static void hevc_d_media_req_queue(struct media_request *req)
385 +{
386 + media_request_mark_manual_completion(req);
387 + v4l2_m2m_request_queue(req);
388 +}
389 +
390 +static const struct v4l2_file_operations hevc_d_fops = {
391 + .owner = THIS_MODULE,
392 + .open = hevc_d_open,
393 + .release = hevc_d_release,
394 + .poll = v4l2_m2m_fop_poll,
395 + .unlocked_ioctl = video_ioctl2,
396 + .mmap = v4l2_m2m_fop_mmap,
397 +};
398 +
399 +static const struct video_device hevc_d_video_device = {
400 + .name = HEVC_D_NAME,
401 + .vfl_dir = VFL_DIR_M2M,
402 + .fops = &hevc_d_fops,
403 + .ioctl_ops = &hevc_d_ioctl_ops,
404 + .minor = -1,
405 + .release = video_device_release_empty,
406 + .device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING,
407 +};
408 +
409 +static const struct v4l2_m2m_ops hevc_d_m2m_ops = {
410 + .device_run = hevc_d_device_run,
411 +};
412 +
413 +static const struct media_device_ops hevc_d_m2m_media_ops = {
414 + .req_validate = hevc_d_request_validate,
415 + .req_queue = hevc_d_media_req_queue,
416 +};
417 +
418 +static int hevc_d_probe(struct platform_device *pdev)
419 +{
420 + struct hevc_d_dev *dev;
421 + struct video_device *vfd;
422 + int ret;
423 +
424 + dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
425 + if (!dev)
426 + return -ENOMEM;
427 +
428 + dev->vfd = hevc_d_video_device;
429 + dev->dev = &pdev->dev;
430 + dev->pdev = pdev;
431 +
432 + ret = 0;
433 + ret = hevc_d_hw_probe(dev);
434 + if (ret) {
435 + dev_err(&pdev->dev, "Failed to probe hardware - %d\n", ret);
436 + return ret;
437 + }
438 +
439 + mutex_init(&dev->dev_mutex);
440 +
441 + ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
442 + if (ret) {
443 + dev_err(&pdev->dev, "Failed to register V4L2 device\n");
444 + return ret;
445 + }
446 +
447 + vfd = &dev->vfd;
448 + vfd->lock = &dev->dev_mutex;
449 + vfd->v4l2_dev = &dev->v4l2_dev;
450 +
451 + snprintf(vfd->name, sizeof(vfd->name), "%s", hevc_d_video_device.name);
452 + video_set_drvdata(vfd, dev);
453 +
454 + ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(36));
455 + if (ret) {
456 + v4l2_err(&dev->v4l2_dev,
457 + "Failed dma_set_mask_and_coherent\n");
458 + goto err_v4l2;
459 + }
460 +
461 + dev->m2m_dev = v4l2_m2m_init(&hevc_d_m2m_ops);
462 + if (IS_ERR(dev->m2m_dev)) {
463 + v4l2_err(&dev->v4l2_dev,
464 + "Failed to initialize V4L2 M2M device\n");
465 + ret = PTR_ERR(dev->m2m_dev);
466 +
467 + goto err_v4l2;
468 + }
469 +
470 + dev->mdev.dev = &pdev->dev;
471 + strscpy(dev->mdev.model, HEVC_D_NAME, sizeof(dev->mdev.model));
472 + strscpy(dev->mdev.bus_info, "platform:" HEVC_D_NAME,
473 + sizeof(dev->mdev.bus_info));
474 +
475 + media_device_init(&dev->mdev);
476 + dev->mdev.ops = &hevc_d_m2m_media_ops;
477 + dev->v4l2_dev.mdev = &dev->mdev;
478 +
479 + ret = video_register_device(vfd, VFL_TYPE_VIDEO, -1);
480 + if (ret) {
481 + v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
482 + goto err_m2m;
483 + }
484 +
485 + v4l2_info(&dev->v4l2_dev,
486 + "Device registered as /dev/video%d\n", vfd->num);
487 +
488 + ret = v4l2_m2m_register_media_controller(dev->m2m_dev, vfd,
489 + MEDIA_ENT_F_PROC_VIDEO_DECODER);
490 + if (ret) {
491 + v4l2_err(&dev->v4l2_dev,
492 + "Failed to initialize V4L2 M2M media controller\n");
493 + goto err_video;
494 + }
495 +
496 + ret = media_device_register(&dev->mdev);
497 + if (ret) {
498 + v4l2_err(&dev->v4l2_dev, "Failed to register media device\n");
499 + goto err_m2m_mc;
500 + }
501 +
502 + platform_set_drvdata(pdev, dev);
503 +
504 + return 0;
505 +
506 +err_m2m_mc:
507 + v4l2_m2m_unregister_media_controller(dev->m2m_dev);
508 +err_video:
509 + video_unregister_device(&dev->vfd);
510 +err_m2m:
511 + v4l2_m2m_release(dev->m2m_dev);
512 +err_v4l2:
513 + v4l2_device_unregister(&dev->v4l2_dev);
514 +
515 + return ret;
516 +}
517 +
518 +static void hevc_d_remove(struct platform_device *pdev)
519 +{
520 + struct hevc_d_dev *dev = platform_get_drvdata(pdev);
521 +
522 + if (media_devnode_is_registered(dev->mdev.devnode)) {
523 + media_device_unregister(&dev->mdev);
524 + v4l2_m2m_unregister_media_controller(dev->m2m_dev);
525 + media_device_cleanup(&dev->mdev);
526 + }
527 +
528 + v4l2_m2m_release(dev->m2m_dev);
529 + video_unregister_device(&dev->vfd);
530 + v4l2_device_unregister(&dev->v4l2_dev);
531 +
532 + hevc_d_hw_remove(dev);
533 +}
534 +
535 +static const struct of_device_id hevc_d_dt_match[] = {
536 + { .compatible = "raspberrypi,hevc-dec", },
537 + { /* sentinel */ }
538 +};
539 +MODULE_DEVICE_TABLE(of, hevc_d_dt_match);
540 +
541 +static struct platform_driver hevc_d_driver = {
542 + .probe = hevc_d_probe,
543 + .remove = hevc_d_remove,
544 + .driver = {
545 + .name = HEVC_D_NAME,
546 + .of_match_table = of_match_ptr(hevc_d_dt_match),
547 + },
548 +};
549 +module_platform_driver(hevc_d_driver);
550 +
551 +MODULE_LICENSE("GPL");
552 +MODULE_AUTHOR("John Cox <john.cox@raspberrypi.com>");
553 +MODULE_DESCRIPTION("Raspberry Pi HEVC V4L2 driver");
554 --- /dev/null
555 +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.h
556 @@ -0,0 +1,189 @@
557 +/* SPDX-License-Identifier: GPL-2.0 */
558 +/*
559 + * Raspberry Pi HEVC driver
560 + *
561 + * Copyright (C) 2024 Raspberry Pi Ltd
562 + *
563 + * Based on the Cedrus VPU driver, that is:
564 + *
565 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
566 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
567 + * Copyright (C) 2018 Bootlin
568 + */
569 +
570 +#ifndef _HEVC_D_H_
571 +#define _HEVC_D_H_
572 +
573 +#include <linux/clk.h>
574 +#include <linux/platform_device.h>
575 +#include <media/v4l2-ctrls.h>
576 +#include <media/v4l2-device.h>
577 +#include <media/v4l2-mem2mem.h>
578 +#include <media/videobuf2-v4l2.h>
579 +#include <media/videobuf2-dma-contig.h>
580 +
581 +#define HEVC_D_DEC_ENV_COUNT 6
582 +#define HEVC_D_P1BUF_COUNT 3
583 +#define HEVC_D_P2BUF_COUNT 3
584 +
585 +#define HEVC_D_NAME "rpi-hevc-dec"
586 +
587 +#define HEVC_D_CAPABILITY_UNTILED BIT(0)
588 +#define HEVC_D_CAPABILITY_H265_DEC BIT(1)
589 +
590 +#define HEVC_D_QUIRK_NO_DMA_OFFSET BIT(0)
591 +
592 +enum hevc_d_irq_status {
593 + HEVC_D_IRQ_NONE,
594 + HEVC_D_IRQ_ERROR,
595 + HEVC_D_IRQ_OK,
596 +};
597 +
598 +struct hevc_d_control {
599 + struct v4l2_ctrl_config cfg;
600 + unsigned char required:1;
601 +};
602 +
603 +struct hevc_d_h265_run {
604 + u32 slice_ents;
605 + const struct v4l2_ctrl_hevc_sps *sps;
606 + const struct v4l2_ctrl_hevc_pps *pps;
607 + const struct v4l2_ctrl_hevc_decode_params *dec;
608 + const struct v4l2_ctrl_hevc_slice_params *slice_params;
609 + const struct v4l2_ctrl_hevc_scaling_matrix *scaling_matrix;
610 +};
611 +
612 +struct hevc_d_run {
613 + struct vb2_v4l2_buffer *src;
614 + struct vb2_v4l2_buffer *dst;
615 +
616 + struct hevc_d_h265_run h265;
617 +};
618 +
619 +struct hevc_d_buffer {
620 + struct v4l2_m2m_buffer m2m_buf;
621 +};
622 +
623 +struct hevc_d_dec_state;
624 +struct hevc_d_dec_env;
625 +
626 +struct hevc_d_gptr {
627 + size_t size;
628 + __u8 *ptr;
629 + dma_addr_t addr;
630 + unsigned long attrs;
631 +};
632 +
633 +struct hevc_d_dev;
634 +typedef void (*hevc_d_irq_callback)(struct hevc_d_dev *dev, void *ctx);
635 +
636 +struct hevc_d_q_aux;
637 +#define HEVC_D_AUX_ENT_COUNT VB2_MAX_FRAME
638 +
639 +struct hevc_d_ctx {
640 + struct v4l2_fh fh;
641 + struct hevc_d_dev *dev;
642 +
643 + struct v4l2_pix_format_mplane src_fmt;
644 + struct v4l2_pix_format_mplane dst_fmt;
645 + int dst_fmt_set;
646 +
647 + int src_stream_on;
648 + int dst_stream_on;
649 +
650 + /*
651 + * fatal_err is set if an error has occurred s.t. decode cannot
652 + * continue (such as running out of CMA)
653 + */
654 + int fatal_err;
655 +
656 + /* Lock for queue operations */
657 + struct mutex ctx_mutex;
658 +
659 + struct v4l2_ctrl_handler hdl;
660 + struct v4l2_ctrl **ctrls;
661 +
662 + /*
663 + * state contains stuff that is only needed in phase0
664 + * it could be held in dec_env but that would be wasteful
665 + */
666 + struct hevc_d_dec_state *state;
667 + struct hevc_d_dec_env *dec0;
668 +
669 + /* Spinlock protecting dec_free */
670 + spinlock_t dec_lock;
671 + struct hevc_d_dec_env *dec_free;
672 +
673 + struct hevc_d_dec_env *dec_pool;
674 +
675 + unsigned int p1idx;
676 + atomic_t p1out;
677 +
678 + unsigned int p2idx;
679 + struct hevc_d_gptr pu_bufs[HEVC_D_P2BUF_COUNT];
680 + struct hevc_d_gptr coeff_bufs[HEVC_D_P2BUF_COUNT];
681 +
682 + /* Spinlock protecting aux_free */
683 + spinlock_t aux_lock;
684 + struct hevc_d_q_aux *aux_free;
685 +
686 + struct hevc_d_q_aux *aux_ents[HEVC_D_AUX_ENT_COUNT];
687 +
688 + unsigned int colmv_stride;
689 + unsigned int colmv_picsize;
690 +};
691 +
692 +struct hevc_d_variant {
693 + unsigned int capabilities;
694 + unsigned int quirks;
695 + unsigned int mod_rate;
696 +};
697 +
698 +struct hevc_d_hw_irq_ent;
699 +
700 +#define HEVC_D_ICTL_ENABLE_UNLIMITED (-1)
701 +
702 +struct hevc_d_hw_irq_ctrl {
703 + /* Spinlock protecting claim and tail */
704 + spinlock_t lock;
705 + struct hevc_d_hw_irq_ent *claim;
706 + struct hevc_d_hw_irq_ent *tail;
707 +
708 + /* Ent for pending irq - also prevents sched */
709 + struct hevc_d_hw_irq_ent *irq;
710 + /* Non-zero => do not start a new job - outer layer sched pending */
711 + int no_sched;
712 + /* Enable count. -1 always OK, 0 do not sched, +ve shed & count down */
713 + int enable;
714 + /* Thread CB requested */
715 + bool thread_reqed;
716 +};
717 +
718 +struct hevc_d_dev {
719 + struct v4l2_device v4l2_dev;
720 + struct video_device vfd;
721 + struct media_device mdev;
722 + struct media_pad pad[2];
723 + struct platform_device *pdev;
724 + struct device *dev;
725 + struct v4l2_m2m_dev *m2m_dev;
726 +
727 + /* Device file mutex */
728 + struct mutex dev_mutex;
729 +
730 + void __iomem *base_irq;
731 + void __iomem *base_h265;
732 +
733 + struct clk *clock;
734 + unsigned long max_clock_rate;
735 +
736 + int cache_align;
737 +
738 + struct hevc_d_hw_irq_ctrl ic_active1;
739 + struct hevc_d_hw_irq_ctrl ic_active2;
740 +};
741 +
742 +struct v4l2_ctrl *hevc_d_find_ctrl(struct hevc_d_ctx *ctx, u32 id);
743 +void *hevc_d_find_control_data(struct hevc_d_ctx *ctx, u32 id);
744 +
745 +#endif
746 --- /dev/null
747 +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.c
748 @@ -0,0 +1,2542 @@
749 +// SPDX-License-Identifier: GPL-2.0-or-later
750 +/*
751 + * Raspberry Pi HEVC driver
752 + *
753 + * Copyright (C) 2020 Raspberry Pi Ltd
754 + *
755 + * Based on the Cedrus VPU driver, that is:
756 + *
757 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
758 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
759 + * Copyright (C) 2018 Bootlin
760 + */
761 +
762 +#include <linux/delay.h>
763 +#include <linux/types.h>
764 +
765 +#include <media/videobuf2-dma-contig.h>
766 +
767 +#include "hevc_d.h"
768 +#include "hevc_d_h265.h"
769 +#include "hevc_d_hw.h"
770 +#include "hevc_d_video.h"
771 +
772 +enum hevc_slice_type {
773 + HEVC_SLICE_B = 0,
774 + HEVC_SLICE_P = 1,
775 + HEVC_SLICE_I = 2,
776 +};
777 +
778 +enum hevc_layer { L0 = 0, L1 = 1 };
779 +
780 +static int gptr_alloc(struct hevc_d_dev *const dev, struct hevc_d_gptr *gptr,
781 + size_t size, unsigned long attrs)
782 +{
783 + gptr->size = size;
784 + gptr->attrs = attrs;
785 + gptr->addr = 0;
786 + gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, &gptr->addr,
787 + GFP_KERNEL, gptr->attrs);
788 + return !gptr->ptr ? -ENOMEM : 0;
789 +}
790 +
791 +static void gptr_free(struct hevc_d_dev *const dev,
792 + struct hevc_d_gptr *const gptr)
793 +{
794 + if (gptr->ptr)
795 + dma_free_attrs(dev->dev, gptr->size, gptr->ptr, gptr->addr,
796 + gptr->attrs);
797 + gptr->size = 0;
798 + gptr->ptr = NULL;
799 + gptr->addr = 0;
800 + gptr->attrs = 0;
801 +}
802 +
803 +/* Realloc but do not copy
804 + *
805 + * Frees then allocs.
806 + * If the alloc fails then it attempts to re-allocote the old size
807 + * On error then check gptr->ptr to determine if anything is currently
808 + * allocated.
809 + */
810 +static int gptr_realloc_new(struct hevc_d_dev * const dev,
811 + struct hevc_d_gptr * const gptr, size_t size)
812 +{
813 + const size_t old_size = gptr->size;
814 +
815 + if (size == gptr->size)
816 + return 0;
817 +
818 + if (gptr->ptr)
819 + dma_free_attrs(dev->dev, gptr->size, gptr->ptr,
820 + gptr->addr, gptr->attrs);
821 +
822 + gptr->addr = 0;
823 + gptr->size = size;
824 + gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size,
825 + &gptr->addr, GFP_KERNEL, gptr->attrs);
826 +
827 + if (!gptr->ptr) {
828 + gptr->addr = 0;
829 + gptr->size = old_size;
830 + gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size,
831 + &gptr->addr, GFP_KERNEL, gptr->attrs);
832 + if (!gptr->ptr) {
833 + gptr->size = 0;
834 + gptr->addr = 0;
835 + gptr->attrs = 0;
836 + }
837 + return -ENOMEM;
838 + }
839 +
840 + return 0;
841 +}
842 +
843 +static size_t next_size(const size_t x)
844 +{
845 + return hevc_d_round_up_size(x + 1);
846 +}
847 +
848 +#define NUM_SCALING_FACTORS 4064 /* Not a typo = 0xbe0 + 0x400 */
849 +
850 +#define AXI_BASE64 0
851 +
852 +#define PROB_BACKUP ((20 << 12) + (20 << 6) + (0 << 0))
853 +#define PROB_RELOAD ((20 << 12) + (20 << 0) + (0 << 6))
854 +
855 +#define HEVC_MAX_REFS V4L2_HEVC_DPB_ENTRIES_NUM_MAX
856 +
857 +struct rpi_cmd {
858 + u32 addr;
859 + u32 data;
860 +} __packed;
861 +
862 +struct hevc_d_q_aux {
863 + unsigned int refcount;
864 + unsigned int q_index;
865 + struct hevc_d_q_aux *next;
866 + struct hevc_d_gptr col;
867 +};
868 +
869 +enum hevc_d_decode_state {
870 + HEVC_D_DECODE_SLICE_START,
871 + HEVC_D_DECODE_ERROR_DONE,
872 + HEVC_D_DECODE_PHASE1,
873 + HEVC_D_DECODE_END,
874 +};
875 +
876 +struct hevc_d_dec_env {
877 + struct hevc_d_ctx *ctx;
878 + struct hevc_d_dec_env *next;
879 +
880 + enum hevc_d_decode_state state;
881 + unsigned int decode_order;
882 + int p1_status; /* P1 status - what to realloc */
883 +
884 + struct rpi_cmd *cmd_fifo;
885 + unsigned int cmd_len, cmd_max;
886 + unsigned int num_slice_msgs;
887 + unsigned int pic_width_in_ctbs_y;
888 + unsigned int pic_height_in_ctbs_y;
889 + unsigned int dpbno_col;
890 + u32 reg_slicestart;
891 + int collocated_from_l0_flag;
892 + /*
893 + * Last CTB/Tile X,Y processed by (wpp_)entry_point
894 + * Could be in _state as P0 only but needs updating where _state
895 + * is const
896 + */
897 + unsigned int entry_ctb_x;
898 + unsigned int entry_ctb_y;
899 + unsigned int entry_tile_x;
900 + unsigned int entry_tile_y;
901 + unsigned int entry_qp;
902 + u32 entry_slice;
903 +
904 + u32 rpi_config2;
905 + u32 rpi_framesize;
906 + u32 rpi_currpoc;
907 +
908 + struct vb2_v4l2_buffer *frame_buf;
909 + struct vb2_v4l2_buffer *src_buf;
910 + dma_addr_t frame_luma_addr;
911 + unsigned int luma_stride;
912 + dma_addr_t frame_chroma_addr;
913 + unsigned int chroma_stride;
914 + dma_addr_t ref_addrs[16][2];
915 + struct hevc_d_q_aux *frame_aux;
916 + struct hevc_d_q_aux *col_aux;
917 +
918 + dma_addr_t cmd_addr;
919 + size_t cmd_size;
920 +
921 + dma_addr_t pu_base_vc;
922 + dma_addr_t coeff_base_vc;
923 + u32 pu_stride;
924 + u32 coeff_stride;
925 +
926 +#define SLICE_MSGS_MAX (2 * HEVC_MAX_REFS * 8 + 3)
927 + u16 slice_msgs[SLICE_MSGS_MAX];
928 + u8 scaling_factors[NUM_SCALING_FACTORS];
929 +
930 + struct media_request *req_pin;
931 + struct hevc_d_hw_irq_ent irq_ent;
932 +};
933 +
934 +struct hevc_d_dec_state {
935 + struct v4l2_ctrl_hevc_sps sps;
936 + struct v4l2_ctrl_hevc_pps pps;
937 +
938 + /* Helper vars & tables derived from sps/pps */
939 + unsigned int log2_ctb_size; /* log2 width of a CTB */
940 + unsigned int ctb_width; /* Width in CTBs */
941 + unsigned int ctb_height; /* Height in CTBs */
942 + unsigned int ctb_size; /* Pic area in CTBs */
943 + unsigned int tile_width; /* Width in tiles */
944 + unsigned int tile_height; /* Height in tiles */
945 +
946 + int *col_bd;
947 + int *row_bd;
948 + int *ctb_addr_rs_to_ts;
949 + int *ctb_addr_ts_to_rs;
950 +
951 + /* Aux starage for DPB */
952 + struct hevc_d_q_aux *ref_aux[HEVC_MAX_REFS];
953 + struct hevc_d_q_aux *frame_aux;
954 +
955 + /* Slice vars */
956 + unsigned int slice_idx;
957 + bool slice_temporal_mvp; /* Slice flag but constant for frame */
958 + bool use_aux;
959 + bool mk_aux;
960 +
961 + /* Temp vars per run - don't actually need to persist */
962 + dma_addr_t src_addr;
963 + const struct v4l2_ctrl_hevc_slice_params *sh;
964 + const struct v4l2_ctrl_hevc_decode_params *dec;
965 + unsigned int nb_refs[2];
966 + unsigned int slice_qp;
967 + unsigned int max_num_merge_cand; // 0 if I-slice
968 + bool dependent_slice_segment_flag;
969 +
970 + unsigned int start_ts; /* slice_segment_addr -> ts */
971 + unsigned int start_ctb_x; /* CTB X,Y of start_ts */
972 + unsigned int start_ctb_y;
973 + unsigned int prev_ctb_x; /* CTB X,Y of start_ts - 1 */
974 + unsigned int prev_ctb_y;
975 +};
976 +
977 +static inline int clip_int(const int x, const int lo, const int hi)
978 +{
979 + return x < lo ? lo : x > hi ? hi : x;
980 +}
981 +
982 +/* Phase 1 command and bit FIFOs */
983 +static int cmds_check_space(struct hevc_d_dec_env *const de, unsigned int n)
984 +{
985 + struct rpi_cmd *a;
986 + unsigned int newmax;
987 +
988 + if (n > 0x100000) {
989 + v4l2_err(&de->ctx->dev->v4l2_dev,
990 + "%s: n %u implausible\n", __func__, n);
991 + return -ENOMEM;
992 + }
993 +
994 + if (de->cmd_len + n <= de->cmd_max)
995 + return 0;
996 +
997 + newmax = roundup_pow_of_two(de->cmd_len + n);
998 +
999 + a = krealloc(de->cmd_fifo, newmax * sizeof(struct rpi_cmd),
1000 + GFP_KERNEL);
1001 + if (!a) {
1002 + v4l2_err(&de->ctx->dev->v4l2_dev,
1003 + "Failed cmd buffer realloc from %u to %u\n",
1004 + de->cmd_max, newmax);
1005 + return -ENOMEM;
1006 + }
1007 + v4l2_info(&de->ctx->dev->v4l2_dev,
1008 + "cmd buffer realloc from %u to %u\n", de->cmd_max, newmax);
1009 +
1010 + de->cmd_fifo = a;
1011 + de->cmd_max = newmax;
1012 + return 0;
1013 +}
1014 +
1015 +// ???? u16 addr - put in u32
1016 +static void p1_apb_write(struct hevc_d_dec_env *const de, const u16 addr,
1017 + const u32 data)
1018 +{
1019 + if (de->cmd_len >= de->cmd_max) {
1020 + v4l2_err(&de->ctx->dev->v4l2_dev,
1021 + "%s: Overflow @ %d\n", __func__, de->cmd_len);
1022 + return;
1023 + }
1024 +
1025 + de->cmd_fifo[de->cmd_len].addr = addr;
1026 + de->cmd_fifo[de->cmd_len].data = data;
1027 +
1028 + de->cmd_len++;
1029 +}
1030 +
1031 +static int ctb_to_tile(unsigned int ctb, unsigned int *bd, int num)
1032 +{
1033 + int i;
1034 +
1035 + for (i = 1; ctb >= bd[i]; i++)
1036 + ; /* bd[] has num+1 elements; bd[0]=0; */
1037 +
1038 + return i - 1;
1039 +}
1040 +
1041 +static unsigned int ctb_to_tile_x(const struct hevc_d_dec_state *const s,
1042 + const unsigned int ctb_x)
1043 +{
1044 + return ctb_to_tile(ctb_x, s->col_bd, s->tile_width);
1045 +}
1046 +
1047 +static unsigned int ctb_to_tile_y(const struct hevc_d_dec_state *const s,
1048 + const unsigned int ctb_y)
1049 +{
1050 + return ctb_to_tile(ctb_y, s->row_bd, s->tile_height);
1051 +}
1052 +
1053 +static void aux_q_free(struct hevc_d_ctx *const ctx,
1054 + struct hevc_d_q_aux *const aq)
1055 +{
1056 + struct hevc_d_dev *const dev = ctx->dev;
1057 +
1058 + gptr_free(dev, &aq->col);
1059 + kfree(aq);
1060 +}
1061 +
1062 +static struct hevc_d_q_aux *aux_q_alloc(struct hevc_d_ctx *const ctx,
1063 + const unsigned int q_index)
1064 +{
1065 + struct hevc_d_dev *const dev = ctx->dev;
1066 + struct hevc_d_q_aux *const aq = kzalloc(sizeof(*aq), GFP_KERNEL);
1067 +
1068 + if (!aq)
1069 + return NULL;
1070 +
1071 + if (gptr_alloc(dev, &aq->col, ctx->colmv_picsize,
1072 + DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_KERNEL_MAPPING))
1073 + goto fail;
1074 +
1075 + /*
1076 + * Spinlock not required as called in P0 only and
1077 + * aux checks done by _new
1078 + */
1079 + aq->refcount = 1;
1080 + aq->q_index = q_index;
1081 + ctx->aux_ents[q_index] = aq;
1082 + return aq;
1083 +
1084 +fail:
1085 + kfree(aq);
1086 + return NULL;
1087 +}
1088 +
1089 +static struct hevc_d_q_aux *aux_q_new(struct hevc_d_ctx *const ctx,
1090 + const unsigned int q_index)
1091 +{
1092 + struct hevc_d_q_aux *aq;
1093 + unsigned long lockflags;
1094 +
1095 + spin_lock_irqsave(&ctx->aux_lock, lockflags);
1096 + /*
1097 + * If we already have this allocated to a slot then use that
1098 + * and assume that it will all work itself out in the pipeline
1099 + */
1100 + aq = ctx->aux_ents[q_index];
1101 + if (aq) {
1102 + ++aq->refcount;
1103 + } else {
1104 + aq = ctx->aux_free;
1105 + if (aq) {
1106 + ctx->aux_free = aq->next;
1107 + aq->next = NULL;
1108 + aq->refcount = 1;
1109 + aq->q_index = q_index;
1110 + ctx->aux_ents[q_index] = aq;
1111 + }
1112 + }
1113 + spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1114 +
1115 + if (!aq)
1116 + aq = aux_q_alloc(ctx, q_index);
1117 +
1118 + return aq;
1119 +}
1120 +
1121 +static struct hevc_d_q_aux *aux_q_ref_idx(struct hevc_d_ctx *const ctx,
1122 + const int q_index)
1123 +{
1124 + unsigned long lockflags;
1125 + struct hevc_d_q_aux *aq;
1126 +
1127 + spin_lock_irqsave(&ctx->aux_lock, lockflags);
1128 + aq = ctx->aux_ents[q_index];
1129 + if (aq)
1130 + ++aq->refcount;
1131 + spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1132 +
1133 + return aq;
1134 +}
1135 +
1136 +static struct hevc_d_q_aux *aux_q_ref(struct hevc_d_ctx *const ctx,
1137 + struct hevc_d_q_aux *const aq)
1138 +{
1139 + unsigned long lockflags;
1140 +
1141 + if (aq) {
1142 + spin_lock_irqsave(&ctx->aux_lock, lockflags);
1143 + ++aq->refcount;
1144 + spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1145 + }
1146 + return aq;
1147 +}
1148 +
1149 +static void aux_q_release(struct hevc_d_ctx *const ctx,
1150 + struct hevc_d_q_aux **const paq)
1151 +{
1152 + struct hevc_d_q_aux *const aq = *paq;
1153 + unsigned long lockflags;
1154 +
1155 + if (!aq)
1156 + return;
1157 +
1158 + *paq = NULL;
1159 +
1160 + spin_lock_irqsave(&ctx->aux_lock, lockflags);
1161 + if (--aq->refcount == 0) {
1162 + aq->next = ctx->aux_free;
1163 + ctx->aux_free = aq;
1164 + ctx->aux_ents[aq->q_index] = NULL;
1165 + aq->q_index = ~0U;
1166 + }
1167 + spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
1168 +}
1169 +
1170 +static void aux_q_init(struct hevc_d_ctx *const ctx)
1171 +{
1172 + spin_lock_init(&ctx->aux_lock);
1173 + ctx->aux_free = NULL;
1174 +}
1175 +
1176 +static void aux_q_uninit(struct hevc_d_ctx *const ctx)
1177 +{
1178 + struct hevc_d_q_aux *aq;
1179 +
1180 + ctx->colmv_picsize = 0;
1181 + ctx->colmv_stride = 0;
1182 + while ((aq = ctx->aux_free) != NULL) {
1183 + ctx->aux_free = aq->next;
1184 + aux_q_free(ctx, aq);
1185 + }
1186 +}
1187 +
1188 +/*
1189 + * Initialisation process for context variables (CABAC init)
1190 + * see H.265 9.3.2.2
1191 + *
1192 + * N.B. If comparing with FFmpeg note that this h/w uses slightly different
1193 + * offsets to FFmpegs array
1194 + */
1195 +
1196 +/* Actual number of values */
1197 +#define RPI_PROB_VALS 154U
1198 +/* Rounded up as we copy words */
1199 +#define RPI_PROB_ARRAY_SIZE ((154 + 3) & ~3)
1200 +
1201 +/* Initialiser values - see tables H.265 9-4 through 9-42 */
1202 +static const u8 prob_init[3][156] = {
1203 + {
1204 + 153, 200, 139, 141, 157, 154, 154, 154, 154, 154, 184, 154, 154,
1205 + 154, 184, 63, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,
1206 + 154, 154, 154, 153, 138, 138, 111, 141, 94, 138, 182, 154, 154,
1207 + 154, 140, 92, 137, 138, 140, 152, 138, 139, 153, 74, 149, 92,
1208 + 139, 107, 122, 152, 140, 179, 166, 182, 140, 227, 122, 197, 110,
1209 + 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
1210 + 79, 108, 123, 63, 110, 110, 124, 125, 140, 153, 125, 127, 140,
1211 + 109, 111, 143, 127, 111, 79, 108, 123, 63, 91, 171, 134, 141,
1212 + 138, 153, 136, 167, 152, 152, 139, 139, 111, 111, 125, 110, 110,
1213 + 94, 124, 108, 124, 107, 125, 141, 179, 153, 125, 107, 125, 141,
1214 + 179, 153, 125, 107, 125, 141, 179, 153, 125, 140, 139, 182, 182,
1215 + 152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111, 0, 0,
1216 + },
1217 + {
1218 + 153, 185, 107, 139, 126, 197, 185, 201, 154, 149, 154, 139, 154,
1219 + 154, 154, 152, 110, 122, 95, 79, 63, 31, 31, 153, 153, 168,
1220 + 140, 198, 79, 124, 138, 94, 153, 111, 149, 107, 167, 154, 154,
1221 + 154, 154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136,
1222 + 153, 121, 136, 137, 169, 194, 166, 167, 154, 167, 137, 182, 125,
1223 + 110, 94, 110, 95, 79, 125, 111, 110, 78, 110, 111, 111, 95,
1224 + 94, 108, 123, 108, 125, 110, 94, 110, 95, 79, 125, 111, 110,
1225 + 78, 110, 111, 111, 95, 94, 108, 123, 108, 121, 140, 61, 154,
1226 + 107, 167, 91, 122, 107, 167, 139, 139, 155, 154, 139, 153, 139,
1227 + 123, 123, 63, 153, 166, 183, 140, 136, 153, 154, 166, 183, 140,
1228 + 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 123, 123,
1229 + 107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140, 0, 0,
1230 + },
1231 + {
1232 + 153, 160, 107, 139, 126, 197, 185, 201, 154, 134, 154, 139, 154,
1233 + 154, 183, 152, 154, 137, 95, 79, 63, 31, 31, 153, 153, 168,
1234 + 169, 198, 79, 224, 167, 122, 153, 111, 149, 92, 167, 154, 154,
1235 + 154, 154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136,
1236 + 153, 121, 136, 122, 169, 208, 166, 167, 154, 152, 167, 182, 125,
1237 + 110, 124, 110, 95, 94, 125, 111, 111, 79, 125, 126, 111, 111,
1238 + 79, 108, 123, 93, 125, 110, 124, 110, 95, 94, 125, 111, 111,
1239 + 79, 125, 126, 111, 111, 79, 108, 123, 93, 121, 140, 61, 154,
1240 + 107, 167, 91, 107, 107, 167, 139, 139, 170, 154, 139, 153, 139,
1241 + 123, 123, 63, 124, 166, 183, 140, 136, 153, 154, 166, 183, 140,
1242 + 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 138, 138,
1243 + 122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140, 0, 0,
1244 + },
1245 +};
1246 +
1247 +#define CMDS_WRITE_PROB ((RPI_PROB_ARRAY_SIZE / 4) + 1)
1248 +
1249 +static void write_prob(struct hevc_d_dec_env *const de,
1250 + const struct hevc_d_dec_state *const s)
1251 +{
1252 + const unsigned int init_type =
1253 + ((s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT) != 0 &&
1254 + s->sh->slice_type != HEVC_SLICE_I) ?
1255 + s->sh->slice_type + 1 :
1256 + 2 - s->sh->slice_type;
1257 + const int q = clip_int(s->slice_qp, 0, 51);
1258 + const u8 *p = prob_init[init_type];
1259 + u8 dst[RPI_PROB_ARRAY_SIZE];
1260 + unsigned int i;
1261 +
1262 + for (i = 0; i < RPI_PROB_VALS; i++) {
1263 + int init_value = p[i];
1264 + int m = (init_value >> 4) * 5 - 45;
1265 + int n = ((init_value & 15) << 3) - 16;
1266 + int pre = 2 * (((m * q) >> 4) + n) - 127;
1267 +
1268 + pre ^= pre >> 31;
1269 + if (pre > 124)
1270 + pre = 124 + (pre & 1);
1271 + dst[i] = pre;
1272 + }
1273 + for (i = RPI_PROB_VALS; i != RPI_PROB_ARRAY_SIZE; ++i)
1274 + dst[i] = 0;
1275 +
1276 + for (i = 0; i < RPI_PROB_ARRAY_SIZE; i += 4)
1277 + p1_apb_write(de, 0x1000 + i,
1278 + dst[i] + (dst[i + 1] << 8) + (dst[i + 2] << 16) +
1279 + (dst[i + 3] << 24));
1280 +
1281 + /*
1282 + * Having written the prob array back it up
1283 + * This is not always needed but is a small overhead that simplifies
1284 + * (and speeds up) some multi-tile & WPP scenarios
1285 + * There are no scenarios where having written a prob we ever want
1286 + * a previous (non-initial) state back
1287 + */
1288 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1289 +}
1290 +
1291 +#define CMDS_WRITE_SCALING_FACTORS NUM_SCALING_FACTORS
1292 +static void write_scaling_factors(struct hevc_d_dec_env *const de)
1293 +{
1294 + const u8 *p = (u8 *)de->scaling_factors;
1295 + int i;
1296 +
1297 + for (i = 0; i < NUM_SCALING_FACTORS; i += 4, p += 4)
1298 + p1_apb_write(de, 0x2000 + i,
1299 + p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24));
1300 +}
1301 +
1302 +static inline __u32 dma_to_axi_addr(dma_addr_t a)
1303 +{
1304 + return (__u32)(a >> 6);
1305 +}
1306 +
1307 +#define CMDS_WRITE_BITSTREAM 4
1308 +static int write_bitstream(struct hevc_d_dec_env *const de,
1309 + const struct hevc_d_dec_state *const s)
1310 +{
1311 + // FIXME!!!!
1312 + // Note that FFmpeg V4L2 does not remove emulation prevention bytes,
1313 + // so this is matched in the configuration here.
1314 + // Whether that is the correct behaviour or not is not clear in the
1315 + // spec.
1316 + const int rpi_use_emu = 1;
1317 + unsigned int offset = s->sh->data_byte_offset;
1318 + const unsigned int len = (s->sh->bit_size + 7) / 8 - offset;
1319 + dma_addr_t addr = s->src_addr + offset;
1320 +
1321 + offset = addr & 63;
1322 +
1323 + p1_apb_write(de, RPI_BFBASE, dma_to_axi_addr(addr));
1324 + p1_apb_write(de, RPI_BFNUM, len);
1325 + p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop
1326 + p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6));
1327 + return 0;
1328 +}
1329 +
1330 +/*
1331 + * The slice constant part of the slice register - width and height need to
1332 + * be ORed in later as they are per-tile / WPP-row
1333 + */
1334 +static u32 slice_reg_const(const struct hevc_d_dec_state *const s)
1335 +{
1336 + u32 x = (s->max_num_merge_cand << 0) |
1337 + (s->nb_refs[L0] << 4) |
1338 + (s->nb_refs[L1] << 8) |
1339 + (s->sh->slice_type << 12);
1340 +
1341 + if (s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA)
1342 + x |= BIT(14);
1343 + if (s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA)
1344 + x |= BIT(15);
1345 + if (s->sh->slice_type == HEVC_SLICE_B &&
1346 + (s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO))
1347 + x |= BIT(16);
1348 +
1349 + return x;
1350 +}
1351 +
1352 +#define CMDS_NEW_SLICE_SEGMENT (4 + CMDS_WRITE_SCALING_FACTORS)
1353 +
1354 +static void new_slice_segment(struct hevc_d_dec_env *const de,
1355 + const struct hevc_d_dec_state *const s)
1356 +{
1357 + const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
1358 + const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
1359 +
1360 + p1_apb_write(de,
1361 + RPI_SPS0,
1362 + ((sps->log2_min_luma_coding_block_size_minus3 + 3) << 0) |
1363 + (s->log2_ctb_size << 4) |
1364 + ((sps->log2_min_luma_transform_block_size_minus2 + 2)
1365 + << 8) |
1366 + ((sps->log2_min_luma_transform_block_size_minus2 + 2 +
1367 + sps->log2_diff_max_min_luma_transform_block_size)
1368 + << 12) |
1369 + ((sps->bit_depth_luma_minus8 + 8) << 16) |
1370 + ((sps->bit_depth_chroma_minus8 + 8) << 20) |
1371 + (sps->max_transform_hierarchy_depth_intra << 24) |
1372 + (sps->max_transform_hierarchy_depth_inter << 28));
1373 +
1374 + p1_apb_write(de,
1375 + RPI_SPS1,
1376 + ((sps->pcm_sample_bit_depth_luma_minus1 + 1) << 0) |
1377 + ((sps->pcm_sample_bit_depth_chroma_minus1 + 1) << 4) |
1378 + ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3)
1379 + << 8) |
1380 + ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 +
1381 + sps->log2_diff_max_min_pcm_luma_coding_block_size)
1382 + << 12) |
1383 + (((sps->flags & V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE) ?
1384 + 0 : sps->chroma_format_idc) << 16) |
1385 + ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED)) << 18) |
1386 + ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) << 19) |
1387 + ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED))
1388 + << 20) |
1389 + ((!!(sps->flags &
1390 + V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED))
1391 + << 21));
1392 +
1393 + p1_apb_write(de,
1394 + RPI_PPS,
1395 + ((s->log2_ctb_size - pps->diff_cu_qp_delta_depth) << 0) |
1396 + ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED))
1397 + << 4) |
1398 + ((!!(pps->flags &
1399 + V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED))
1400 + << 5) |
1401 + ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED))
1402 + << 6) |
1403 + ((!!(pps->flags &
1404 + V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED))
1405 + << 7) |
1406 + (((pps->pps_cb_qp_offset + s->sh->slice_cb_qp_offset) & 255)
1407 + << 8) |
1408 + (((pps->pps_cr_qp_offset + s->sh->slice_cr_qp_offset) & 255)
1409 + << 16) |
1410 + ((!!(pps->flags &
1411 + V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED))
1412 + << 24));
1413 +
1414 + if (!s->start_ts &&
1415 + (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
1416 + write_scaling_factors(de);
1417 +
1418 + if (!s->dependent_slice_segment_flag) {
1419 + int ctb_col = s->sh->slice_segment_addr %
1420 + de->pic_width_in_ctbs_y;
1421 + int ctb_row = s->sh->slice_segment_addr /
1422 + de->pic_width_in_ctbs_y;
1423 +
1424 + de->reg_slicestart = (ctb_col << 0) + (ctb_row << 16);
1425 + }
1426 +
1427 + p1_apb_write(de, RPI_SLICESTART, de->reg_slicestart);
1428 +}
1429 +
1430 +/* Slice messages */
1431 +
1432 +static void msg_slice(struct hevc_d_dec_env *const de, const u16 msg)
1433 +{
1434 + de->slice_msgs[de->num_slice_msgs++] = msg;
1435 +}
1436 +
1437 +#define CMDS_PROGRAM_SLICECMDS (1 + SLICE_MSGS_MAX)
1438 +static void program_slicecmds(struct hevc_d_dec_env *const de,
1439 + const int sliceid)
1440 +{
1441 + int i;
1442 +
1443 + p1_apb_write(de, RPI_SLICECMDS, de->num_slice_msgs + (sliceid << 8));
1444 +
1445 + for (i = 0; i < de->num_slice_msgs; i++)
1446 + p1_apb_write(de, 0x4000 + 4 * i, de->slice_msgs[i] & 0xffff);
1447 +}
1448 +
1449 +/* NoBackwardPredictionFlag 8.3.5 - Simply checks POCs */
1450 +static int has_backward(const struct v4l2_hevc_dpb_entry *const dpb,
1451 + const __u8 *const idx, const unsigned int n,
1452 + const s32 cur_poc)
1453 +{
1454 + unsigned int i;
1455 +
1456 + for (i = 0; i < n; ++i) {
1457 + if (cur_poc < dpb[idx[i]].pic_order_cnt_val)
1458 + return 0;
1459 + }
1460 + return 1;
1461 +}
1462 +
1463 +static void pre_slice_decode(struct hevc_d_dec_env *const de,
1464 + const struct hevc_d_dec_state *const s)
1465 +{
1466 + const struct v4l2_ctrl_hevc_slice_params *const sh = s->sh;
1467 + const struct v4l2_ctrl_hevc_decode_params *const dec = s->dec;
1468 + int weighted_pred_flag, idx;
1469 + u16 cmd_slice;
1470 + unsigned int collocated_from_l0_flag;
1471 +
1472 + de->num_slice_msgs = 0;
1473 +
1474 + cmd_slice = 0;
1475 + if (sh->slice_type == HEVC_SLICE_I)
1476 + cmd_slice = 1;
1477 + if (sh->slice_type == HEVC_SLICE_P)
1478 + cmd_slice = 2;
1479 + if (sh->slice_type == HEVC_SLICE_B)
1480 + cmd_slice = 3;
1481 +
1482 + cmd_slice |= (s->nb_refs[L0] << 2) | (s->nb_refs[L1] << 6) |
1483 + (s->max_num_merge_cand << 11);
1484 +
1485 + collocated_from_l0_flag =
1486 + !s->slice_temporal_mvp ||
1487 + sh->slice_type != HEVC_SLICE_B ||
1488 + (sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0);
1489 + cmd_slice |= collocated_from_l0_flag << 14;
1490 +
1491 + if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
1492 + /* Flag to say all reference pictures are from the past */
1493 + const int no_backward_pred_flag =
1494 + has_backward(dec->dpb, sh->ref_idx_l0, s->nb_refs[L0],
1495 + sh->slice_pic_order_cnt) &&
1496 + has_backward(dec->dpb, sh->ref_idx_l1, s->nb_refs[L1],
1497 + sh->slice_pic_order_cnt);
1498 + cmd_slice |= no_backward_pred_flag << 10;
1499 + msg_slice(de, cmd_slice);
1500 +
1501 + if (s->slice_temporal_mvp) {
1502 + const __u8 *const rpl = collocated_from_l0_flag ?
1503 + sh->ref_idx_l0 : sh->ref_idx_l1;
1504 + de->dpbno_col = rpl[sh->collocated_ref_idx];
1505 + }
1506 +
1507 + /* Write reference picture descriptions */
1508 + weighted_pred_flag =
1509 + sh->slice_type == HEVC_SLICE_P ?
1510 + !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) :
1511 + !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED);
1512 +
1513 + for (idx = 0; idx < s->nb_refs[L0]; ++idx) {
1514 + unsigned int dpb_no = sh->ref_idx_l0[idx];
1515 +
1516 + msg_slice(de,
1517 + dpb_no |
1518 + ((dec->dpb[dpb_no].flags &
1519 + V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE) ?
1520 + (1 << 4) : 0) |
1521 + (weighted_pred_flag ? (3 << 5) : 0));
1522 + msg_slice(de, dec->dpb[dpb_no].pic_order_cnt_val & 0xffff);
1523 +
1524 + if (weighted_pred_flag) {
1525 + const struct v4l2_hevc_pred_weight_table
1526 + *const w = &sh->pred_weight_table;
1527 + const int luma_weight_denom =
1528 + (1 << w->luma_log2_weight_denom);
1529 + const unsigned int chroma_log2_weight_denom =
1530 + (w->luma_log2_weight_denom +
1531 + w->delta_chroma_log2_weight_denom);
1532 + const int chroma_weight_denom =
1533 + (1 << chroma_log2_weight_denom);
1534 +
1535 + msg_slice(de,
1536 + w->luma_log2_weight_denom |
1537 + (((w->delta_luma_weight_l0[idx] +
1538 + luma_weight_denom) & 0x1ff)
1539 + << 3));
1540 + msg_slice(de, w->luma_offset_l0[idx] & 0xff);
1541 + msg_slice(de,
1542 + chroma_log2_weight_denom |
1543 + (((w->delta_chroma_weight_l0[idx][0] +
1544 + chroma_weight_denom) & 0x1ff)
1545 + << 3));
1546 + msg_slice(de,
1547 + w->chroma_offset_l0[idx][0] & 0xff);
1548 + msg_slice(de,
1549 + chroma_log2_weight_denom |
1550 + (((w->delta_chroma_weight_l0[idx][1] +
1551 + chroma_weight_denom) & 0x1ff)
1552 + << 3));
1553 + msg_slice(de,
1554 + w->chroma_offset_l0[idx][1] & 0xff);
1555 + }
1556 + }
1557 +
1558 + for (idx = 0; idx < s->nb_refs[L1]; ++idx) {
1559 + unsigned int dpb_no = sh->ref_idx_l1[idx];
1560 +
1561 + msg_slice(de,
1562 + dpb_no |
1563 + ((dec->dpb[dpb_no].flags &
1564 + V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE) ?
1565 + (1 << 4) : 0) |
1566 + (weighted_pred_flag ? (3 << 5) : 0));
1567 + msg_slice(de, dec->dpb[dpb_no].pic_order_cnt_val & 0xffff);
1568 + if (weighted_pred_flag) {
1569 + const struct v4l2_hevc_pred_weight_table
1570 + *const w = &sh->pred_weight_table;
1571 + const int luma_weight_denom =
1572 + (1 << w->luma_log2_weight_denom);
1573 + const unsigned int chroma_log2_weight_denom =
1574 + (w->luma_log2_weight_denom +
1575 + w->delta_chroma_log2_weight_denom);
1576 + const int chroma_weight_denom =
1577 + (1 << chroma_log2_weight_denom);
1578 +
1579 + msg_slice(de,
1580 + w->luma_log2_weight_denom |
1581 + (((w->delta_luma_weight_l1[idx] +
1582 + luma_weight_denom) & 0x1ff) << 3));
1583 + msg_slice(de, w->luma_offset_l1[idx] & 0xff);
1584 + msg_slice(de,
1585 + chroma_log2_weight_denom |
1586 + (((w->delta_chroma_weight_l1[idx][0] +
1587 + chroma_weight_denom) & 0x1ff)
1588 + << 3));
1589 + msg_slice(de,
1590 + w->chroma_offset_l1[idx][0] & 0xff);
1591 + msg_slice(de,
1592 + chroma_log2_weight_denom |
1593 + (((w->delta_chroma_weight_l1[idx][1] +
1594 + chroma_weight_denom) & 0x1ff)
1595 + << 3));
1596 + msg_slice(de,
1597 + w->chroma_offset_l1[idx][1] & 0xff);
1598 + }
1599 + }
1600 + } else {
1601 + msg_slice(de, cmd_slice);
1602 + }
1603 +
1604 + msg_slice(de,
1605 + (sh->slice_beta_offset_div2 & 15) |
1606 + ((sh->slice_tc_offset_div2 & 15) << 4) |
1607 + ((sh->flags &
1608 + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED) ?
1609 + 1 << 8 : 0) |
1610 + ((sh->flags &
1611 + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED) ?
1612 + 1 << 9 : 0) |
1613 + ((s->pps.flags &
1614 + V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED) ?
1615 + 1 << 10 : 0));
1616 +
1617 + msg_slice(de, ((sh->slice_cr_qp_offset & 31) << 5) +
1618 + (sh->slice_cb_qp_offset & 31)); /* CMD_QPOFF */
1619 +}
1620 +
1621 +#define CMDS_WRITE_SLICE 1
1622 +static void write_slice(struct hevc_d_dec_env *const de,
1623 + const struct hevc_d_dec_state *const s,
1624 + const u32 slice_const,
1625 + const unsigned int ctb_col,
1626 + const unsigned int ctb_row)
1627 +{
1628 + const unsigned int cs = (1 << s->log2_ctb_size);
1629 + const unsigned int w_last = s->sps.pic_width_in_luma_samples & (cs - 1);
1630 + const unsigned int h_last = s->sps.pic_height_in_luma_samples & (cs - 1);
1631 +
1632 + p1_apb_write(de, RPI_SLICE,
1633 + slice_const |
1634 + ((ctb_col + 1 < s->ctb_width || !w_last ?
1635 + cs : w_last) << 17) |
1636 + ((ctb_row + 1 < s->ctb_height || !h_last ?
1637 + cs : h_last) << 24));
1638 +}
1639 +
1640 +#define PAUSE_MODE_WPP 1
1641 +#define PAUSE_MODE_TILE 0xffff
1642 +
1643 +/*
1644 + * N.B. This can be called to fill in data from the previous slice so must not
1645 + * use any state data that may change from slice to slice (e.g. qp)
1646 + */
1647 +#define CMDS_NEW_ENTRY_POINT (6 + CMDS_WRITE_SLICE)
1648 +
1649 +static void new_entry_point(struct hevc_d_dec_env *const de,
1650 + const struct hevc_d_dec_state *const s,
1651 + const bool do_bte,
1652 + const bool reset_qp_y,
1653 + const u32 pause_mode,
1654 + const unsigned int tile_x,
1655 + const unsigned int tile_y,
1656 + const unsigned int ctb_col,
1657 + const unsigned int ctb_row,
1658 + const unsigned int slice_qp,
1659 + const u32 slice_const)
1660 +{
1661 + const unsigned int endx = s->col_bd[tile_x + 1] - 1;
1662 + const unsigned int endy = (pause_mode == PAUSE_MODE_WPP) ?
1663 + ctb_row : s->row_bd[tile_y + 1] - 1;
1664 +
1665 + p1_apb_write(de, RPI_TILESTART,
1666 + s->col_bd[tile_x] | (s->row_bd[tile_y] << 16));
1667 + p1_apb_write(de, RPI_TILEEND, endx | (endy << 16));
1668 +
1669 + if (do_bte)
1670 + p1_apb_write(de, RPI_BEGINTILEEND, endx | (endy << 16));
1671 +
1672 + write_slice(de, s, slice_const, endx, endy);
1673 +
1674 + if (reset_qp_y) {
1675 + unsigned int sps_qp_bd_offset =
1676 + 6 * s->sps.bit_depth_luma_minus8;
1677 +
1678 + p1_apb_write(de, RPI_QP, sps_qp_bd_offset + slice_qp);
1679 + }
1680 +
1681 + p1_apb_write(de, RPI_MODE,
1682 + pause_mode |
1683 + ((endx == s->ctb_width - 1) << 17) |
1684 + ((endy == s->ctb_height - 1) << 18));
1685 +
1686 + p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) | (ctb_row << 16));
1687 +
1688 + de->entry_tile_x = tile_x;
1689 + de->entry_tile_y = tile_y;
1690 + de->entry_ctb_x = ctb_col;
1691 + de->entry_ctb_y = ctb_row;
1692 + de->entry_qp = slice_qp;
1693 + de->entry_slice = slice_const;
1694 +}
1695 +
1696 +/* Wavefront mode */
1697 +
1698 +#define CMDS_WPP_PAUSE 4
1699 +static void wpp_pause(struct hevc_d_dec_env *const de, int ctb_row)
1700 +{
1701 + p1_apb_write(de, RPI_STATUS, (ctb_row << 18) | 0x25);
1702 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1703 + p1_apb_write(de, RPI_MODE,
1704 + ctb_row == de->pic_height_in_ctbs_y - 1 ?
1705 + 0x70000 : 0x30000);
1706 + p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2);
1707 +}
1708 +
1709 +#define CMDS_WPP_ENTRY_FILL_1 (CMDS_WPP_PAUSE + 2 + CMDS_NEW_ENTRY_POINT)
1710 +static int wpp_entry_fill(struct hevc_d_dec_env *const de,
1711 + const struct hevc_d_dec_state *const s,
1712 + const unsigned int last_y)
1713 +{
1714 + int rv;
1715 + const unsigned int last_x = s->ctb_width - 1;
1716 +
1717 + rv = cmds_check_space(de, CMDS_WPP_ENTRY_FILL_1 *
1718 + (last_y - de->entry_ctb_y));
1719 + if (rv)
1720 + return rv;
1721 +
1722 + while (de->entry_ctb_y < last_y) {
1723 + /* wpp_entry_x/y set by wpp_entry_point */
1724 + if (s->ctb_width > 2)
1725 + wpp_pause(de, de->entry_ctb_y);
1726 + p1_apb_write(de, RPI_STATUS,
1727 + (de->entry_ctb_y << 18) | (last_x << 5) | 2);
1728 +
1729 + /* if width == 1 then the saved state is the init one */
1730 + if (s->ctb_width == 2)
1731 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1732 + else
1733 + p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
1734 +
1735 + new_entry_point(de, s, false, true, PAUSE_MODE_WPP,
1736 + 0, 0, 0, de->entry_ctb_y + 1,
1737 + de->entry_qp, de->entry_slice);
1738 + }
1739 + return 0;
1740 +}
1741 +
1742 +static int wpp_end_previous_slice(struct hevc_d_dec_env *const de,
1743 + const struct hevc_d_dec_state *const s)
1744 +{
1745 + int rv;
1746 +
1747 + rv = wpp_entry_fill(de, s, s->prev_ctb_y);
1748 + if (rv)
1749 + return rv;
1750 +
1751 + rv = cmds_check_space(de, CMDS_WPP_PAUSE + 2);
1752 + if (rv)
1753 + return rv;
1754 +
1755 + if (de->entry_ctb_x < 2 &&
1756 + (de->entry_ctb_y < s->start_ctb_y || s->start_ctb_x > 2) &&
1757 + s->ctb_width > 2)
1758 + wpp_pause(de, s->prev_ctb_y);
1759 + p1_apb_write(de, RPI_STATUS,
1760 + 1 | (s->prev_ctb_x << 5) | (s->prev_ctb_y << 18));
1761 + if (s->start_ctb_x == 2 ||
1762 + (s->ctb_width == 2 && de->entry_ctb_y < s->start_ctb_y))
1763 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
1764 + return 0;
1765 +}
1766 +
1767 +/*
1768 + * Only main profile supported so WPP => !Tiles which makes some of the
1769 + * next chunk code simpler
1770 + */
1771 +static int wpp_decode_slice(struct hevc_d_dec_env *const de,
1772 + const struct hevc_d_dec_state *const s,
1773 + bool last_slice)
1774 +{
1775 + bool reset_qp_y = true;
1776 + const bool indep = !s->dependent_slice_segment_flag;
1777 + int rv;
1778 +
1779 + if (s->start_ts) {
1780 + rv = wpp_end_previous_slice(de, s);
1781 + if (rv)
1782 + return rv;
1783 + }
1784 + pre_slice_decode(de, s);
1785 +
1786 + rv = cmds_check_space(de,
1787 + CMDS_WRITE_BITSTREAM +
1788 + CMDS_WRITE_PROB +
1789 + CMDS_PROGRAM_SLICECMDS +
1790 + CMDS_NEW_SLICE_SEGMENT +
1791 + CMDS_NEW_ENTRY_POINT);
1792 + if (rv)
1793 + return rv;
1794 +
1795 + rv = write_bitstream(de, s);
1796 + if (rv)
1797 + return rv;
1798 +
1799 + if (!s->start_ts || indep || s->ctb_width == 1)
1800 + write_prob(de, s);
1801 + else if (!s->start_ctb_x)
1802 + p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
1803 + else
1804 + reset_qp_y = false;
1805 +
1806 + program_slicecmds(de, s->slice_idx);
1807 + new_slice_segment(de, s);
1808 + new_entry_point(de, s, indep, reset_qp_y, PAUSE_MODE_WPP,
1809 + 0, 0, s->start_ctb_x, s->start_ctb_y,
1810 + s->slice_qp, slice_reg_const(s));
1811 +
1812 + if (last_slice) {
1813 + rv = wpp_entry_fill(de, s, s->ctb_height - 1);
1814 + if (rv)
1815 + return rv;
1816 +
1817 + rv = cmds_check_space(de, CMDS_WPP_PAUSE + 1);
1818 + if (rv)
1819 + return rv;
1820 +
1821 + if (de->entry_ctb_x < 2 && s->ctb_width > 2)
1822 + wpp_pause(de, s->ctb_height - 1);
1823 +
1824 + p1_apb_write(de, RPI_STATUS,
1825 + 1 | ((s->ctb_width - 1) << 5) |
1826 + ((s->ctb_height - 1) << 18));
1827 + }
1828 + return 0;
1829 +}
1830 +
1831 +/* Tiles mode */
1832 +
1833 +/* Guarantees 1 cmd entry free on exit */
1834 +static int tile_entry_fill(struct hevc_d_dec_env *const de,
1835 + const struct hevc_d_dec_state *const s,
1836 + const unsigned int last_tile_x,
1837 + const unsigned int last_tile_y)
1838 +{
1839 + while (de->entry_tile_y < last_tile_y ||
1840 + (de->entry_tile_y == last_tile_y &&
1841 + de->entry_tile_x < last_tile_x)) {
1842 + int rv;
1843 + unsigned int t_x = de->entry_tile_x;
1844 + unsigned int t_y = de->entry_tile_y;
1845 + const unsigned int last_x = s->col_bd[t_x + 1] - 1;
1846 + const unsigned int last_y = s->row_bd[t_y + 1] - 1;
1847 +
1848 + /* One more than needed here */
1849 + rv = cmds_check_space(de, CMDS_NEW_ENTRY_POINT + 3);
1850 + if (rv)
1851 + return rv;
1852 +
1853 + p1_apb_write(de, RPI_STATUS,
1854 + 2 | (last_x << 5) | (last_y << 18));
1855 + p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
1856 +
1857 + // Inc tile
1858 + if (++t_x >= s->tile_width) {
1859 + t_x = 0;
1860 + ++t_y;
1861 + }
1862 +
1863 + new_entry_point(de, s, false, true, PAUSE_MODE_TILE,
1864 + t_x, t_y, s->col_bd[t_x], s->row_bd[t_y],
1865 + de->entry_qp, de->entry_slice);
1866 + }
1867 + return 0;
1868 +}
1869 +
1870 +/* Write STATUS register with expected end CTU address of previous slice */
1871 +static int end_previous_slice(struct hevc_d_dec_env *const de,
1872 + const struct hevc_d_dec_state *const s)
1873 +{
1874 + int rv;
1875 +
1876 + rv = tile_entry_fill(de, s,
1877 + ctb_to_tile_x(s, s->prev_ctb_x),
1878 + ctb_to_tile_y(s, s->prev_ctb_y));
1879 + if (rv)
1880 + return rv;
1881 +
1882 + p1_apb_write(de, RPI_STATUS,
1883 + 1 | (s->prev_ctb_x << 5) | (s->prev_ctb_y << 18));
1884 + return 0;
1885 +}
1886 +
1887 +static int decode_slice(struct hevc_d_dec_env *const de,
1888 + const struct hevc_d_dec_state *const s,
1889 + bool last_slice)
1890 +{
1891 + bool reset_qp_y;
1892 + unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x);
1893 + unsigned int tile_y = ctb_to_tile_y(s, s->start_ctb_y);
1894 + int rv;
1895 +
1896 + if (s->start_ts) {
1897 + rv = end_previous_slice(de, s);
1898 + if (rv)
1899 + return rv;
1900 + }
1901 +
1902 + rv = cmds_check_space(de,
1903 + CMDS_WRITE_BITSTREAM +
1904 + CMDS_WRITE_PROB +
1905 + CMDS_PROGRAM_SLICECMDS +
1906 + CMDS_NEW_SLICE_SEGMENT +
1907 + CMDS_NEW_ENTRY_POINT);
1908 + if (rv)
1909 + return rv;
1910 +
1911 + pre_slice_decode(de, s);
1912 + rv = write_bitstream(de, s);
1913 + if (rv)
1914 + return rv;
1915 +
1916 + reset_qp_y = !s->start_ts ||
1917 + !s->dependent_slice_segment_flag ||
1918 + tile_x != ctb_to_tile_x(s, s->prev_ctb_x) ||
1919 + tile_y != ctb_to_tile_y(s, s->prev_ctb_y);
1920 + if (reset_qp_y)
1921 + write_prob(de, s);
1922 +
1923 + program_slicecmds(de, s->slice_idx);
1924 + new_slice_segment(de, s);
1925 + new_entry_point(de, s, !s->dependent_slice_segment_flag, reset_qp_y,
1926 + PAUSE_MODE_TILE,
1927 + tile_x, tile_y, s->start_ctb_x, s->start_ctb_y,
1928 + s->slice_qp, slice_reg_const(s));
1929 +
1930 + /*
1931 + * If this is the last slice then fill in the other tile entries
1932 + * now, otherwise this will be done at the start of the next slice
1933 + * when it will be known where this slice finishes
1934 + */
1935 + if (last_slice) {
1936 + rv = tile_entry_fill(de, s,
1937 + s->tile_width - 1,
1938 + s->tile_height - 1);
1939 + if (rv)
1940 + return rv;
1941 + p1_apb_write(de, RPI_STATUS,
1942 + 1 | ((s->ctb_width - 1) << 5) |
1943 + ((s->ctb_height - 1) << 18));
1944 + }
1945 + return 0;
1946 +}
1947 +
1948 +/* Scaling factors */
1949 +
1950 +static void expand_scaling_list(const unsigned int size_id,
1951 + u8 *const dst0,
1952 + const u8 *const src0, uint8_t dc)
1953 +{
1954 + u8 *d;
1955 + unsigned int x, y;
1956 +
1957 + switch (size_id) {
1958 + case 0:
1959 + memcpy(dst0, src0, 16);
1960 + break;
1961 + case 1:
1962 + memcpy(dst0, src0, 64);
1963 + break;
1964 + case 2:
1965 + d = dst0;
1966 +
1967 + for (y = 0; y != 16; y++) {
1968 + const u8 *s = src0 + (y >> 1) * 8;
1969 +
1970 + for (x = 0; x != 8; ++x) {
1971 + *d++ = *s;
1972 + *d++ = *s++;
1973 + }
1974 + }
1975 + dst0[0] = dc;
1976 + break;
1977 + default:
1978 + d = dst0;
1979 +
1980 + for (y = 0; y != 32; y++) {
1981 + const u8 *s = src0 + (y >> 2) * 8;
1982 +
1983 + for (x = 0; x != 8; ++x) {
1984 + *d++ = *s;
1985 + *d++ = *s;
1986 + *d++ = *s;
1987 + *d++ = *s++;
1988 + }
1989 + }
1990 + dst0[0] = dc;
1991 + break;
1992 + }
1993 +}
1994 +
1995 +static void populate_scaling_factors(const struct hevc_d_run *const run,
1996 + struct hevc_d_dec_env *const de,
1997 + const struct hevc_d_dec_state *const s)
1998 +{
1999 + const struct v4l2_ctrl_hevc_scaling_matrix *const sl =
2000 + run->h265.scaling_matrix;
2001 + /* Array of constants for scaling factors */
2002 + static const u32 scaling_factor_offsets[4][6] = {
2003 + /*
2004 + * MID0 MID1 MID2 MID3 MID4 MID5
2005 + */
2006 + /* SID0 (4x4) */
2007 + { 0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050 },
2008 + /* SID1 (8x8) */
2009 + { 0x0060, 0x00A0, 0x00E0, 0x0120, 0x0160, 0x01A0 },
2010 + /* SID2 (16x16) */
2011 + { 0x01E0, 0x02E0, 0x03E0, 0x04E0, 0x05E0, 0x06E0 },
2012 + /* SID3 (32x32) */
2013 + { 0x07E0, 0x0BE0, 0x0000, 0x0000, 0x0000, 0x0000 }
2014 + };
2015 + unsigned int mid;
2016 +
2017 + for (mid = 0; mid < 6; mid++)
2018 + expand_scaling_list(0, de->scaling_factors +
2019 + scaling_factor_offsets[0][mid],
2020 + sl->scaling_list_4x4[mid], 0);
2021 + for (mid = 0; mid < 6; mid++)
2022 + expand_scaling_list(1, de->scaling_factors +
2023 + scaling_factor_offsets[1][mid],
2024 + sl->scaling_list_8x8[mid], 0);
2025 + for (mid = 0; mid < 6; mid++)
2026 + expand_scaling_list(2, de->scaling_factors +
2027 + scaling_factor_offsets[2][mid],
2028 + sl->scaling_list_16x16[mid],
2029 + sl->scaling_list_dc_coef_16x16[mid]);
2030 + for (mid = 0; mid < 2; mid++)
2031 + expand_scaling_list(3, de->scaling_factors +
2032 + scaling_factor_offsets[3][mid],
2033 + sl->scaling_list_32x32[mid],
2034 + sl->scaling_list_dc_coef_32x32[mid]);
2035 +}
2036 +
2037 +static void free_ps_info(struct hevc_d_dec_state *const s)
2038 +{
2039 + kfree(s->ctb_addr_rs_to_ts);
2040 + s->ctb_addr_rs_to_ts = NULL;
2041 + kfree(s->ctb_addr_ts_to_rs);
2042 + s->ctb_addr_ts_to_rs = NULL;
2043 +
2044 + kfree(s->col_bd);
2045 + s->col_bd = NULL;
2046 + kfree(s->row_bd);
2047 + s->row_bd = NULL;
2048 +}
2049 +
2050 +static unsigned int tile_width(const struct hevc_d_dec_state *const s,
2051 + const unsigned int t_x)
2052 +{
2053 + return s->col_bd[t_x + 1] - s->col_bd[t_x];
2054 +}
2055 +
2056 +static unsigned int tile_height(const struct hevc_d_dec_state *const s,
2057 + const unsigned int t_y)
2058 +{
2059 + return s->row_bd[t_y + 1] - s->row_bd[t_y];
2060 +}
2061 +
2062 +static void fill_rs_to_ts(struct hevc_d_dec_state *const s)
2063 +{
2064 + unsigned int ts = 0;
2065 + unsigned int t_y;
2066 + unsigned int tr_rs = 0;
2067 +
2068 + for (t_y = 0; t_y != s->tile_height; ++t_y) {
2069 + const unsigned int t_h = tile_height(s, t_y);
2070 + unsigned int t_x;
2071 + unsigned int tc_rs = tr_rs;
2072 +
2073 + for (t_x = 0; t_x != s->tile_width; ++t_x) {
2074 + const unsigned int t_w = tile_width(s, t_x);
2075 + unsigned int y;
2076 + unsigned int rs = tc_rs;
2077 +
2078 + for (y = 0; y != t_h; ++y) {
2079 + unsigned int x;
2080 +
2081 + for (x = 0; x != t_w; ++x) {
2082 + s->ctb_addr_rs_to_ts[rs + x] = ts;
2083 + s->ctb_addr_ts_to_rs[ts] = rs + x;
2084 + ++ts;
2085 + }
2086 + rs += s->ctb_width;
2087 + }
2088 + tc_rs += t_w;
2089 + }
2090 + tr_rs += t_h * s->ctb_width;
2091 + }
2092 +}
2093 +
2094 +static int updated_ps(struct hevc_d_dec_state *const s)
2095 +{
2096 + unsigned int i;
2097 +
2098 + free_ps_info(s);
2099 +
2100 + /* Inferred parameters */
2101 + s->log2_ctb_size = s->sps.log2_min_luma_coding_block_size_minus3 + 3 +
2102 + s->sps.log2_diff_max_min_luma_coding_block_size;
2103 +
2104 + s->ctb_width = (s->sps.pic_width_in_luma_samples +
2105 + (1 << s->log2_ctb_size) - 1) >>
2106 + s->log2_ctb_size;
2107 + s->ctb_height = (s->sps.pic_height_in_luma_samples +
2108 + (1 << s->log2_ctb_size) - 1) >>
2109 + s->log2_ctb_size;
2110 + s->ctb_size = s->ctb_width * s->ctb_height;
2111 +
2112 + s->ctb_addr_rs_to_ts = kmalloc_array(s->ctb_size,
2113 + sizeof(*s->ctb_addr_rs_to_ts),
2114 + GFP_KERNEL);
2115 + if (!s->ctb_addr_rs_to_ts)
2116 + goto fail;
2117 + s->ctb_addr_ts_to_rs = kmalloc_array(s->ctb_size,
2118 + sizeof(*s->ctb_addr_ts_to_rs),
2119 + GFP_KERNEL);
2120 + if (!s->ctb_addr_ts_to_rs)
2121 + goto fail;
2122 +
2123 + if (!(s->pps.flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
2124 + s->tile_width = 1;
2125 + s->tile_height = 1;
2126 + } else {
2127 + s->tile_width = s->pps.num_tile_columns_minus1 + 1;
2128 + s->tile_height = s->pps.num_tile_rows_minus1 + 1;
2129 + }
2130 +
2131 + s->col_bd = kmalloc_array((s->tile_width + 1), sizeof(*s->col_bd),
2132 + GFP_KERNEL);
2133 + if (!s->col_bd)
2134 + goto fail;
2135 + s->row_bd = kmalloc_array((s->tile_height + 1), sizeof(*s->row_bd),
2136 + GFP_KERNEL);
2137 + if (!s->row_bd)
2138 + goto fail;
2139 +
2140 + s->col_bd[0] = 0;
2141 + for (i = 1; i < s->tile_width; i++)
2142 + s->col_bd[i] = s->col_bd[i - 1] +
2143 + s->pps.column_width_minus1[i - 1] + 1;
2144 + s->col_bd[s->tile_width] = s->ctb_width;
2145 +
2146 + s->row_bd[0] = 0;
2147 + for (i = 1; i < s->tile_height; i++)
2148 + s->row_bd[i] = s->row_bd[i - 1] +
2149 + s->pps.row_height_minus1[i - 1] + 1;
2150 + s->row_bd[s->tile_height] = s->ctb_height;
2151 +
2152 + fill_rs_to_ts(s);
2153 + return 0;
2154 +
2155 +fail:
2156 + free_ps_info(s);
2157 + /* Set invalid to force reload */
2158 + s->sps.pic_width_in_luma_samples = 0;
2159 + return -ENOMEM;
2160 +}
2161 +
2162 +static int write_cmd_buffer(struct hevc_d_dev *const dev,
2163 + struct hevc_d_dec_env *const de,
2164 + const struct hevc_d_dec_state *const s)
2165 +{
2166 + const size_t cmd_size = ALIGN(de->cmd_len * sizeof(de->cmd_fifo[0]),
2167 + dev->cache_align);
2168 +
2169 + de->cmd_addr = dma_map_single(dev->dev, de->cmd_fifo,
2170 + cmd_size, DMA_TO_DEVICE);
2171 + if (dma_mapping_error(dev->dev, de->cmd_addr)) {
2172 + v4l2_err(&dev->v4l2_dev,
2173 + "Map cmd buffer (%zu): FAILED\n", cmd_size);
2174 + return -ENOMEM;
2175 + }
2176 + de->cmd_size = cmd_size;
2177 + return 0;
2178 +}
2179 +
2180 +static void setup_colmv(struct hevc_d_ctx *const ctx, struct hevc_d_run *run,
2181 + struct hevc_d_dec_state *const s)
2182 +{
2183 + ctx->colmv_stride = ALIGN(s->sps.pic_width_in_luma_samples, 64);
2184 + ctx->colmv_picsize = ctx->colmv_stride *
2185 + (ALIGN(s->sps.pic_height_in_luma_samples, 64) >> 4);
2186 +}
2187 +
2188 +static struct hevc_d_dec_env *dec_env_new(struct hevc_d_ctx *const ctx)
2189 +{
2190 + struct hevc_d_dec_env *de;
2191 + unsigned long lock_flags;
2192 +
2193 + spin_lock_irqsave(&ctx->dec_lock, lock_flags);
2194 +
2195 + de = ctx->dec_free;
2196 + if (de) {
2197 + ctx->dec_free = de->next;
2198 + de->next = NULL;
2199 + de->state = HEVC_D_DECODE_SLICE_START;
2200 + }
2201 +
2202 + spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
2203 + return de;
2204 +}
2205 +
2206 +/* Can be called from irq context */
2207 +static void dec_env_delete(struct hevc_d_dec_env *const de)
2208 +{
2209 + struct hevc_d_ctx * const ctx = de->ctx;
2210 + unsigned long lock_flags;
2211 +
2212 + if (de->cmd_size) {
2213 + dma_unmap_single(ctx->dev->dev, de->cmd_addr, de->cmd_size,
2214 + DMA_TO_DEVICE);
2215 + de->cmd_size = 0;
2216 + }
2217 +
2218 + aux_q_release(ctx, &de->frame_aux);
2219 + aux_q_release(ctx, &de->col_aux);
2220 +
2221 + spin_lock_irqsave(&ctx->dec_lock, lock_flags);
2222 +
2223 + de->state = HEVC_D_DECODE_END;
2224 + de->next = ctx->dec_free;
2225 + ctx->dec_free = de;
2226 +
2227 + spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
2228 +}
2229 +
2230 +static void dec_env_uninit(struct hevc_d_ctx *const ctx)
2231 +{
2232 + unsigned int i;
2233 +
2234 + if (ctx->dec_pool) {
2235 + for (i = 0; i != HEVC_D_DEC_ENV_COUNT; ++i) {
2236 + struct hevc_d_dec_env *const de = ctx->dec_pool + i;
2237 +
2238 + kfree(de->cmd_fifo);
2239 + }
2240 +
2241 + kfree(ctx->dec_pool);
2242 + }
2243 +
2244 + ctx->dec_pool = NULL;
2245 + ctx->dec_free = NULL;
2246 +}
2247 +
2248 +static int dec_env_init(struct hevc_d_ctx *const ctx)
2249 +{
2250 + unsigned int i;
2251 +
2252 + ctx->dec_pool = kzalloc(sizeof(*ctx->dec_pool) * HEVC_D_DEC_ENV_COUNT,
2253 + GFP_KERNEL);
2254 + if (!ctx->dec_pool)
2255 + return -1;
2256 +
2257 + spin_lock_init(&ctx->dec_lock);
2258 +
2259 + ctx->dec_free = ctx->dec_pool;
2260 + for (i = 0; i != HEVC_D_DEC_ENV_COUNT - 1; ++i)
2261 + ctx->dec_pool[i].next = ctx->dec_pool + i + 1;
2262 +
2263 + for (i = 0; i != HEVC_D_DEC_ENV_COUNT; ++i) {
2264 + struct hevc_d_dec_env *const de = ctx->dec_pool + i;
2265 +
2266 + de->ctx = ctx;
2267 + de->decode_order = i;
2268 + de->cmd_max = 8096;
2269 + de->cmd_fifo = kmalloc_array(de->cmd_max,
2270 + sizeof(struct rpi_cmd),
2271 + GFP_KERNEL);
2272 + if (!de->cmd_fifo)
2273 + goto fail;
2274 + }
2275 +
2276 + return 0;
2277 +
2278 +fail:
2279 + dec_env_uninit(ctx);
2280 + return -1;
2281 +}
2282 +
2283 +/*
2284 + * Assume that we get exactly the same DPB for every slice it makes no real
2285 + * sense otherwise.
2286 + */
2287 +#if V4L2_HEVC_DPB_ENTRIES_NUM_MAX > 16
2288 +#error HEVC_DPB_ENTRIES > h/w slots
2289 +#endif
2290 +
2291 +static u32 mk_config2(const struct hevc_d_dec_state *const s)
2292 +{
2293 + const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
2294 + const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
2295 + u32 c;
2296 +
2297 + c = (sps->bit_depth_luma_minus8 + 8) << 0; /* BitDepthY */
2298 + c |= (sps->bit_depth_chroma_minus8 + 8) << 4; /* BitDepthC */
2299 + if (sps->bit_depth_luma_minus8) /* BitDepthY */
2300 + c |= BIT(8);
2301 + if (sps->bit_depth_chroma_minus8) /* BitDepthC */
2302 + c |= BIT(9);
2303 + c |= s->log2_ctb_size << 10;
2304 + if (pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)
2305 + c |= BIT(13);
2306 + if (sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED)
2307 + c |= BIT(14);
2308 + if (s->mk_aux)
2309 + c |= BIT(15); /* Write motion vectors to external memory */
2310 + c |= (pps->log2_parallel_merge_level_minus2 + 2) << 16;
2311 + if (s->slice_temporal_mvp)
2312 + c |= BIT(19);
2313 + if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED)
2314 + c |= BIT(20);
2315 + c |= (pps->pps_cb_qp_offset & 31) << 21;
2316 + c |= (pps->pps_cr_qp_offset & 31) << 26;
2317 + return c;
2318 +}
2319 +
2320 +static inline bool is_ref_unit_type(const unsigned int nal_unit_type)
2321 +{
2322 + /* From Table 7-1
2323 + * True for 1, 3, 5, 7, 9, 11, 13, 15
2324 + */
2325 + return (nal_unit_type & ~0xe) != 0;
2326 +}
2327 +
2328 +void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run)
2329 +{
2330 + struct hevc_d_dev *const dev = ctx->dev;
2331 + const struct v4l2_ctrl_hevc_decode_params *const dec =
2332 + run->h265.dec;
2333 + /* sh0 used where slice header contents should be constant over all
2334 + * slices, or first slice of frame
2335 + */
2336 + const struct v4l2_ctrl_hevc_slice_params *const sh0 =
2337 + run->h265.slice_params;
2338 + struct hevc_d_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
2339 + struct hevc_d_dec_state *const s = ctx->state;
2340 + struct vb2_queue *vq;
2341 + struct hevc_d_dec_env *de = ctx->dec0;
2342 + unsigned int prev_rs;
2343 + unsigned int i;
2344 + int rv;
2345 + bool slice_temporal_mvp;
2346 + unsigned int ctb_size_y;
2347 + bool sps_changed = false;
2348 +
2349 + s->sh = NULL; /* Avoid use until in the slice loop */
2350 +
2351 + slice_temporal_mvp = (sh0->flags &
2352 + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED);
2353 +
2354 + if (de) {
2355 + v4l2_warn(&dev->v4l2_dev, "Decode env set unexpectedly");
2356 + goto fail;
2357 + }
2358 +
2359 + /* Frame start */
2360 +
2361 + if (!is_sps_set(run->h265.sps)) {
2362 + v4l2_warn(&dev->v4l2_dev, "SPS never set\n");
2363 + goto fail;
2364 + }
2365 + /* Can't check for PPS easily as all 0's looks valid */
2366 +
2367 + if (memcmp(&s->sps, run->h265.sps, sizeof(s->sps)) != 0) {
2368 + /* SPS changed */
2369 + memcpy(&s->sps, run->h265.sps, sizeof(s->sps));
2370 + sps_changed = true;
2371 + }
2372 + if (sps_changed ||
2373 + memcmp(&s->pps, run->h265.pps, sizeof(s->pps)) != 0) {
2374 + /* SPS changed */
2375 + memcpy(&s->pps, run->h265.pps, sizeof(s->pps));
2376 +
2377 + /* Recalc stuff as required */
2378 + rv = updated_ps(s);
2379 + if (rv)
2380 + goto fail;
2381 + }
2382 +
2383 + de = dec_env_new(ctx);
2384 + if (!de) {
2385 + v4l2_err(&dev->v4l2_dev, "Failed to find free decode env\n");
2386 + goto fail;
2387 + }
2388 + ctx->dec0 = de;
2389 +
2390 + ctb_size_y =
2391 + 1U << (s->sps.log2_min_luma_coding_block_size_minus3 +
2392 + 3 + s->sps.log2_diff_max_min_luma_coding_block_size);
2393 +
2394 + de->pic_width_in_ctbs_y =
2395 + (s->sps.pic_width_in_luma_samples + ctb_size_y - 1) /
2396 + ctb_size_y; /* 7-15 */
2397 + de->pic_height_in_ctbs_y =
2398 + (s->sps.pic_height_in_luma_samples + ctb_size_y - 1) /
2399 + ctb_size_y; /* 7-17 */
2400 + de->cmd_len = 0;
2401 + de->dpbno_col = ~0U;
2402 +
2403 + de->luma_stride = ctx->dst_fmt.height * 128;
2404 + de->frame_luma_addr =
2405 + vb2_dma_contig_plane_dma_addr(&run->dst->vb2_buf, 0);
2406 + de->chroma_stride = de->luma_stride / 2;
2407 + de->frame_chroma_addr =
2408 + vb2_dma_contig_plane_dma_addr(&run->dst->vb2_buf, 1);
2409 + de->frame_aux = NULL;
2410 +
2411 + if (s->sps.bit_depth_luma_minus8 !=
2412 + s->sps.bit_depth_chroma_minus8) {
2413 + v4l2_warn(&dev->v4l2_dev,
2414 + "Chroma depth (%d) != Luma depth (%d)\n",
2415 + s->sps.bit_depth_chroma_minus8 + 8,
2416 + s->sps.bit_depth_luma_minus8 + 8);
2417 + goto fail;
2418 + }
2419 + if (s->sps.bit_depth_luma_minus8 == 0) {
2420 + if (ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12MT_COL128) {
2421 + v4l2_err(&dev->v4l2_dev,
2422 + "Pixel format %#x != NV12MT_COL128 for 8-bit output",
2423 + ctx->dst_fmt.pixelformat);
2424 + goto fail;
2425 + }
2426 + } else if (s->sps.bit_depth_luma_minus8 == 2) {
2427 + if (ctx->dst_fmt.pixelformat !=
2428 + V4L2_PIX_FMT_NV12MT_10_COL128) {
2429 + v4l2_err(&dev->v4l2_dev,
2430 + "Pixel format %#x != NV12MT_10_COL128 for 10-bit output",
2431 + ctx->dst_fmt.pixelformat);
2432 + goto fail;
2433 + }
2434 + } else {
2435 + v4l2_warn(&dev->v4l2_dev, "Luma depth (%d) unsupported\n",
2436 + s->sps.bit_depth_luma_minus8 + 8);
2437 + goto fail;
2438 + }
2439 + if (run->dst->vb2_buf.num_planes != 2) {
2440 + v4l2_warn(&dev->v4l2_dev, "Capture planes (%d) != 2\n",
2441 + run->dst->vb2_buf.num_planes);
2442 + goto fail;
2443 + }
2444 + if (run->dst->planes[0].length < ctx->dst_fmt.plane_fmt[0].sizeimage ||
2445 + run->dst->planes[1].length < ctx->dst_fmt.plane_fmt[1].sizeimage) {
2446 + v4l2_warn(&dev->v4l2_dev,
2447 + "Capture planes length (%d/%d) < sizeimage (%d/%d)\n",
2448 + run->dst->planes[0].length,
2449 + run->dst->planes[1].length,
2450 + ctx->dst_fmt.plane_fmt[0].sizeimage,
2451 + ctx->dst_fmt.plane_fmt[1].sizeimage);
2452 + goto fail;
2453 + }
2454 +
2455 + /*
2456 + * Fill in ref planes with our address s.t. if we mess up refs
2457 + * somehow then we still have a valid address entry
2458 + */
2459 + for (i = 0; i != 16; ++i) {
2460 + de->ref_addrs[i][0] = de->frame_luma_addr;
2461 + de->ref_addrs[i][1] = de->frame_chroma_addr;
2462 + }
2463 +
2464 + /*
2465 + * Stash initial temporal_mvp flag
2466 + * This must be the same for all pic slices (7.4.7.1)
2467 + */
2468 + s->slice_temporal_mvp = slice_temporal_mvp;
2469 +
2470 + /*
2471 + * Need Aux ents for all (ref) DPB ents if temporal MV could
2472 + * be enabled for any pic
2473 + */
2474 + s->use_aux = ((s->sps.flags &
2475 + V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) != 0);
2476 + s->mk_aux = s->use_aux &&
2477 + (s->sps.sps_max_sub_layers_minus1 >= sh0->nuh_temporal_id_plus1 ||
2478 + is_ref_unit_type(sh0->nal_unit_type));
2479 +
2480 + /* Phase 2 reg pre-calc */
2481 + de->rpi_config2 = mk_config2(s);
2482 + de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) |
2483 + s->sps.pic_width_in_luma_samples;
2484 + de->rpi_currpoc = sh0->slice_pic_order_cnt;
2485 +
2486 + if (s->sps.flags &
2487 + V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) {
2488 + setup_colmv(ctx, run, s);
2489 + }
2490 +
2491 + s->slice_idx = 0;
2492 +
2493 + if (sh0->slice_segment_addr != 0) {
2494 + v4l2_warn(&dev->v4l2_dev,
2495 + "New frame but segment_addr=%d\n",
2496 + sh0->slice_segment_addr);
2497 + goto fail;
2498 + }
2499 +
2500 + /* Either map src buffer or use directly */
2501 + s->src_addr = 0;
2502 +
2503 + s->src_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0);
2504 + if (!s->src_addr) {
2505 + v4l2_err(&dev->v4l2_dev, "Failed to map src buffer\n");
2506 + goto fail;
2507 + }
2508 +
2509 + /* Pre calc parameters */
2510 + s->dec = dec;
2511 + for (i = 0; i != run->h265.slice_ents; ++i) {
2512 + const struct v4l2_ctrl_hevc_slice_params *const sh = sh0 + i;
2513 + const bool last_slice = i + 1 == run->h265.slice_ents;
2514 +
2515 + s->sh = sh;
2516 +
2517 + if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) {
2518 + v4l2_warn(&dev->v4l2_dev,
2519 + "Bit size %d > bytesused %d\n",
2520 + sh->bit_size, run->src->planes[0].bytesused);
2521 + goto fail;
2522 + }
2523 + if (sh->data_byte_offset >= sh->bit_size / 8) {
2524 + v4l2_warn(&dev->v4l2_dev,
2525 + "Bit size %u < Byte offset %u * 8\n",
2526 + sh->bit_size, sh->data_byte_offset);
2527 + goto fail;
2528 + }
2529 +
2530 + s->slice_qp = 26 + s->pps.init_qp_minus26 + sh->slice_qp_delta;
2531 + s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
2532 + 0 :
2533 + (5 - sh->five_minus_max_num_merge_cand);
2534 + s->dependent_slice_segment_flag =
2535 + ((sh->flags &
2536 + V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
2537 +
2538 + s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
2539 + 0 :
2540 + sh->num_ref_idx_l0_active_minus1 + 1;
2541 + s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
2542 + 0 :
2543 + sh->num_ref_idx_l1_active_minus1 + 1;
2544 +
2545 + if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
2546 + populate_scaling_factors(run, de, s);
2547 +
2548 + /* Calc all the random coord info to avoid repeated conversion in/out */
2549 + s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
2550 + s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y;
2551 + s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y;
2552 + /* Last CTB of previous slice */
2553 + prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1];
2554 + s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y;
2555 + s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y;
2556 +
2557 + if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
2558 + rv = wpp_decode_slice(de, s, last_slice);
2559 + else
2560 + rv = decode_slice(de, s, last_slice);
2561 + if (rv)
2562 + goto fail;
2563 +
2564 + ++s->slice_idx;
2565 + }
2566 +
2567 + /* Frame end */
2568 + memset(dpb_q_aux, 0,
2569 + sizeof(*dpb_q_aux) * V4L2_HEVC_DPB_ENTRIES_NUM_MAX);
2570 +
2571 + /*
2572 + * Locate ref frames
2573 + * At least in the current implementation this is constant across all
2574 + * slices. If this changes we will need idx mapping code.
2575 + * Uses sh so here rather than trigger
2576 + */
2577 +
2578 + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
2579 + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
2580 +
2581 + if (!vq) {
2582 + v4l2_err(&dev->v4l2_dev, "VQ gone!\n");
2583 + goto fail;
2584 + }
2585 +
2586 + if (write_cmd_buffer(dev, de, s))
2587 + goto fail;
2588 +
2589 + for (i = 0; i < dec->num_active_dpb_entries; ++i) {
2590 + struct vb2_buffer *buf = vb2_find_buffer(vq, dec->dpb[i].timestamp);
2591 +
2592 + if (!buf) {
2593 + v4l2_warn(&dev->v4l2_dev,
2594 + "Missing DPB ent %d, timestamp=%lld\n",
2595 + i, (long long)dec->dpb[i].timestamp);
2596 + continue;
2597 + }
2598 +
2599 + if (s->use_aux) {
2600 + int buffer_index = buf->index;
2601 +
2602 + dpb_q_aux[i] = aux_q_ref_idx(ctx, buffer_index);
2603 + if (!dpb_q_aux[i])
2604 + v4l2_warn(&dev->v4l2_dev,
2605 + "Missing DPB AUX ent %d, timestamp=%lld, index=%d\n",
2606 + i, (long long)dec->dpb[i].timestamp,
2607 + buffer_index);
2608 + }
2609 +
2610 + de->ref_addrs[i][0] =
2611 + vb2_dma_contig_plane_dma_addr(buf, 0);
2612 + de->ref_addrs[i][1] =
2613 + vb2_dma_contig_plane_dma_addr(buf, 1);
2614 + }
2615 +
2616 + /* Move DPB from temp */
2617 + for (i = 0; i != V4L2_HEVC_DPB_ENTRIES_NUM_MAX; ++i) {
2618 + aux_q_release(ctx, &s->ref_aux[i]);
2619 + s->ref_aux[i] = dpb_q_aux[i];
2620 + }
2621 +
2622 + /* Unref the old frame aux too - it is either in the DPB or not now */
2623 + aux_q_release(ctx, &s->frame_aux);
2624 +
2625 + if (s->mk_aux) {
2626 + s->frame_aux = aux_q_new(ctx, run->dst->vb2_buf.index);
2627 +
2628 + if (!s->frame_aux) {
2629 + v4l2_err(&dev->v4l2_dev,
2630 + "Failed to obtain aux storage for frame\n");
2631 + goto fail;
2632 + }
2633 +
2634 + de->frame_aux = aux_q_ref(ctx, s->frame_aux);
2635 + }
2636 +
2637 + if (de->dpbno_col != ~0U) {
2638 + if (de->dpbno_col >= dec->num_active_dpb_entries) {
2639 + v4l2_err(&dev->v4l2_dev,
2640 + "Col ref index %d >= %d\n",
2641 + de->dpbno_col,
2642 + dec->num_active_dpb_entries);
2643 + } else {
2644 + /* Standard requires that the col pic is constant for
2645 + * the duration of the pic (text of collocated_ref_idx
2646 + * in H265-2 2018 7.4.7.1)
2647 + */
2648 +
2649 + /* Spot the collocated ref in passing */
2650 + de->col_aux = aux_q_ref(ctx,
2651 + dpb_q_aux[de->dpbno_col]);
2652 +
2653 + if (!de->col_aux) {
2654 + v4l2_warn(&dev->v4l2_dev,
2655 + "Missing DPB ent for col\n");
2656 + /* Need to abort if this fails as P2 may
2657 + * explode on bad data
2658 + */
2659 + goto fail;
2660 + }
2661 + }
2662 + }
2663 +
2664 + de->state = HEVC_D_DECODE_PHASE1;
2665 + return;
2666 +
2667 +fail:
2668 + if (de)
2669 + // Actual error reporting happens in Trigger
2670 + de->state = HEVC_D_DECODE_ERROR_DONE;
2671 +}
2672 +
2673 +/* Handle PU and COEFF stream overflow
2674 + *
2675 + * Returns:
2676 + * -1 Phase 1 decode error
2677 + * 0 OK
2678 + * >0 Out of space (bitmask)
2679 + */
2680 +
2681 +#define STATUS_COEFF_EXHAUSTED 8
2682 +#define STATUS_PU_EXHAUSTED 16
2683 +
2684 +static int check_status(const struct hevc_d_dev *const dev)
2685 +{
2686 + const u32 cfstatus = apb_read(dev, RPI_CFSTATUS);
2687 + const u32 cfnum = apb_read(dev, RPI_CFNUM);
2688 + u32 status = apb_read(dev, RPI_STATUS);
2689 +
2690 + /*
2691 + * Handle PU and COEFF stream overflow
2692 + * This is the definition of successful completion of phase 1.
2693 + * It assures that status register is zero and all blocks in each tile
2694 + * have completed
2695 + */
2696 + if (cfstatus == cfnum)
2697 + return 0;
2698 +
2699 + status &= (STATUS_PU_EXHAUSTED | STATUS_COEFF_EXHAUSTED);
2700 + if (status)
2701 + return status;
2702 +
2703 + return -1;
2704 +}
2705 +
2706 +static void phase2_cb(struct hevc_d_dev *const dev, void *v)
2707 +{
2708 + struct hevc_d_dec_env *const de = v;
2709 +
2710 + /* Done with buffers - allow new P1 */
2711 + hevc_d_hw_irq_active1_enable_claim(dev, 1);
2712 +
2713 + v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_DONE);
2714 + de->frame_buf = NULL;
2715 +
2716 + media_request_manual_complete(de->req_pin);
2717 + de->req_pin = NULL;
2718 +
2719 + dec_env_delete(de);
2720 +}
2721 +
2722 +static void phase2_claimed(struct hevc_d_dev *const dev, void *v)
2723 +{
2724 + struct hevc_d_dec_env *const de = v;
2725 + unsigned int i;
2726 +
2727 + apb_write_vc_addr(dev, RPI_PURBASE, de->pu_base_vc);
2728 + apb_write_vc_len(dev, RPI_PURSTRIDE, de->pu_stride);
2729 + apb_write_vc_addr(dev, RPI_COEFFRBASE, de->coeff_base_vc);
2730 + apb_write_vc_len(dev, RPI_COEFFRSTRIDE, de->coeff_stride);
2731 +
2732 + apb_write_vc_addr(dev, RPI_OUTYBASE, de->frame_luma_addr);
2733 + apb_write_vc_addr(dev, RPI_OUTCBASE, de->frame_chroma_addr);
2734 + apb_write_vc_len(dev, RPI_OUTYSTRIDE, de->luma_stride);
2735 + apb_write_vc_len(dev, RPI_OUTCSTRIDE, de->chroma_stride);
2736 +
2737 + for (i = 0; i < 16; i++) {
2738 + // Strides are in fact unused but fill in anyway
2739 + apb_write_vc_addr(dev, 0x9000 + 16 * i, de->ref_addrs[i][0]);
2740 + apb_write_vc_len(dev, 0x9004 + 16 * i, de->luma_stride);
2741 + apb_write_vc_addr(dev, 0x9008 + 16 * i, de->ref_addrs[i][1]);
2742 + apb_write_vc_len(dev, 0x900C + 16 * i, de->chroma_stride);
2743 + }
2744 +
2745 + apb_write(dev, RPI_CONFIG2, de->rpi_config2);
2746 + apb_write(dev, RPI_FRAMESIZE, de->rpi_framesize);
2747 + apb_write(dev, RPI_CURRPOC, de->rpi_currpoc);
2748 +
2749 + /* collocated reads/writes */
2750 + apb_write_vc_len(dev, RPI_COLSTRIDE,
2751 + de->ctx->colmv_stride);
2752 + apb_write_vc_len(dev, RPI_MVSTRIDE,
2753 + de->ctx->colmv_stride);
2754 + apb_write_vc_addr(dev, RPI_MVBASE,
2755 + !de->frame_aux ? 0 : de->frame_aux->col.addr);
2756 + apb_write_vc_addr(dev, RPI_COLBASE,
2757 + !de->col_aux ? 0 : de->col_aux->col.addr);
2758 +
2759 + hevc_d_hw_irq_active2_irq(dev, &de->irq_ent, phase2_cb, de);
2760 +
2761 + apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y);
2762 +}
2763 +
2764 +static void phase1_claimed(struct hevc_d_dev *const dev, void *v);
2765 +
2766 +/* release any and all objects associated with de and reenable phase 1 if
2767 + * required
2768 + */// 1 if required
2769 +static void phase1_err_fin(struct hevc_d_dev *const dev,
2770 + struct hevc_d_ctx *const ctx,
2771 + struct hevc_d_dec_env *const de)
2772 +{
2773 + /* Return all detached buffers */
2774 + if (de->src_buf)
2775 + v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_ERROR);
2776 + de->src_buf = NULL;
2777 + if (de->frame_buf)
2778 + v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_ERROR);
2779 + de->frame_buf = NULL;
2780 +
2781 + if (de->req_pin)
2782 + media_request_manual_complete(de->req_pin);
2783 + de->req_pin = NULL;
2784 +
2785 + dec_env_delete(de);
2786 +
2787 + /* Reenable phase 0 if we were blocking */
2788 + if (atomic_add_return(-1, &ctx->p1out) >= HEVC_D_P1BUF_COUNT - 1)
2789 + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx);
2790 +
2791 + /* Done with P1-P2 buffers - allow new P1 */
2792 + hevc_d_hw_irq_active1_enable_claim(dev, 1);
2793 +}
2794 +
2795 +static void phase1_thread(struct hevc_d_dev *const dev, void *v)
2796 +{
2797 + struct hevc_d_dec_env *const de = v;
2798 + struct hevc_d_ctx *const ctx = de->ctx;
2799 +
2800 + struct hevc_d_gptr *const pu_gptr = ctx->pu_bufs + ctx->p2idx;
2801 + struct hevc_d_gptr *const coeff_gptr = ctx->coeff_bufs + ctx->p2idx;
2802 +
2803 + if (de->p1_status & STATUS_PU_EXHAUSTED) {
2804 + if (gptr_realloc_new(dev, pu_gptr, next_size(pu_gptr->size))) {
2805 + v4l2_err(&dev->v4l2_dev,
2806 + "%s: PU realloc (%zx) failed\n",
2807 + __func__, pu_gptr->size);
2808 + goto fail;
2809 + }
2810 + v4l2_info(&dev->v4l2_dev, "%s: PU realloc (%zx) OK\n",
2811 + __func__, pu_gptr->size);
2812 + }
2813 +
2814 + if (de->p1_status & STATUS_COEFF_EXHAUSTED) {
2815 + if (gptr_realloc_new(dev, coeff_gptr,
2816 + next_size(coeff_gptr->size))) {
2817 + v4l2_err(&dev->v4l2_dev,
2818 + "%s: Coeff realloc (%zx) failed\n",
2819 + __func__, coeff_gptr->size);
2820 + goto fail;
2821 + }
2822 + v4l2_info(&dev->v4l2_dev, "%s: Coeff realloc (%zx) OK\n",
2823 + __func__, coeff_gptr->size);
2824 + }
2825 +
2826 + phase1_claimed(dev, de);
2827 + return;
2828 +
2829 +fail:
2830 + if (!pu_gptr->addr || !coeff_gptr->addr) {
2831 + v4l2_err(&dev->v4l2_dev,
2832 + "%s: Fatal: failed to reclaim old alloc\n",
2833 + __func__);
2834 + ctx->fatal_err = 1;
2835 + }
2836 + phase1_err_fin(dev, ctx, de);
2837 +}
2838 +
2839 +/* Always called in irq context (this is good) */
2840 +static void phase1_cb(struct hevc_d_dev *const dev, void *v)
2841 +{
2842 + struct hevc_d_dec_env *const de = v;
2843 + struct hevc_d_ctx *const ctx = de->ctx;
2844 +
2845 + de->p1_status = check_status(dev);
2846 +
2847 + if (de->p1_status != 0) {
2848 + v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n",
2849 + __func__, de->p1_status);
2850 +
2851 + if (de->p1_status < 0)
2852 + goto fail;
2853 +
2854 + /* Need to realloc - push onto a thread rather than IRQ */
2855 + hevc_d_hw_irq_active1_thread(dev, &de->irq_ent,
2856 + phase1_thread, de);
2857 + return;
2858 + }
2859 +
2860 + v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_DONE);
2861 + de->src_buf = NULL;
2862 +
2863 + /* All phase1 error paths done - it is safe to inc p2idx */
2864 + ctx->p2idx =
2865 + (ctx->p2idx + 1 >= HEVC_D_P2BUF_COUNT) ? 0 : ctx->p2idx + 1;
2866 +
2867 + /* Renable the next setup if we were blocking */
2868 + if (atomic_add_return(-1, &ctx->p1out) >= HEVC_D_P1BUF_COUNT - 1)
2869 + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx);
2870 +
2871 + hevc_d_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de);
2872 +
2873 + return;
2874 +
2875 +fail:
2876 + phase1_err_fin(dev, ctx, de);
2877 +}
2878 +
2879 +static void phase1_claimed(struct hevc_d_dev *const dev, void *v)
2880 +{
2881 + struct hevc_d_dec_env *const de = v;
2882 + struct hevc_d_ctx *const ctx = de->ctx;
2883 +
2884 + const struct hevc_d_gptr * const pu_gptr = ctx->pu_bufs + ctx->p2idx;
2885 + const struct hevc_d_gptr * const coeff_gptr = ctx->coeff_bufs +
2886 + ctx->p2idx;
2887 +
2888 + if (ctx->fatal_err)
2889 + goto fail;
2890 +
2891 + de->pu_base_vc = pu_gptr->addr;
2892 + de->pu_stride =
2893 + ALIGN_DOWN(pu_gptr->size / de->pic_height_in_ctbs_y, 64);
2894 +
2895 + de->coeff_base_vc = coeff_gptr->addr;
2896 + de->coeff_stride =
2897 + ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64);
2898 +
2899 + /* phase1_claimed blocked until cb_phase1 completed so p2idx inc
2900 + * in cb_phase1 after error detection
2901 + */
2902 +
2903 + apb_write_vc_addr(dev, RPI_PUWBASE, de->pu_base_vc);
2904 + apb_write_vc_len(dev, RPI_PUWSTRIDE, de->pu_stride);
2905 + apb_write_vc_addr(dev, RPI_COEFFWBASE, de->coeff_base_vc);
2906 + apb_write_vc_len(dev, RPI_COEFFWSTRIDE, de->coeff_stride);
2907 +
2908 + /* Trigger command FIFO */
2909 + apb_write(dev, RPI_CFNUM, de->cmd_len);
2910 +
2911 + /* Claim irq */
2912 + hevc_d_hw_irq_active1_irq(dev, &de->irq_ent, phase1_cb, de);
2913 +
2914 + /* Start the h/w */
2915 + apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_addr);
2916 +
2917 + return;
2918 +
2919 +fail:
2920 + phase1_err_fin(dev, ctx, de);
2921 +}
2922 +
2923 +static void dec_state_delete(struct hevc_d_ctx *const ctx)
2924 +{
2925 + unsigned int i;
2926 + struct hevc_d_dec_state *const s = ctx->state;
2927 +
2928 + if (!s)
2929 + return;
2930 + ctx->state = NULL;
2931 +
2932 + free_ps_info(s);
2933 +
2934 + for (i = 0; i != HEVC_MAX_REFS; ++i)
2935 + aux_q_release(ctx, &s->ref_aux[i]);
2936 + aux_q_release(ctx, &s->frame_aux);
2937 +
2938 + kfree(s);
2939 +}
2940 +
2941 +struct irq_sync {
2942 + atomic_t done;
2943 + wait_queue_head_t wq;
2944 + struct hevc_d_hw_irq_ent irq_ent;
2945 +};
2946 +
2947 +static void phase2_sync_claimed(struct hevc_d_dev *const dev, void *v)
2948 +{
2949 + struct irq_sync *const sync = v;
2950 +
2951 + atomic_set(&sync->done, 1);
2952 + wake_up(&sync->wq);
2953 +}
2954 +
2955 +static void phase1_sync_claimed(struct hevc_d_dev *const dev, void *v)
2956 +{
2957 + struct irq_sync *const sync = v;
2958 +
2959 + hevc_d_hw_irq_active1_enable_claim(dev, 1);
2960 + hevc_d_hw_irq_active2_claim(dev, &sync->irq_ent, phase2_sync_claimed, sync);
2961 +}
2962 +
2963 +/* Sync with IRQ operations
2964 + *
2965 + * Claims phase1 and phase2 in turn and waits for the phase2 claim so any
2966 + * pending IRQ ops will have completed by the time this returns
2967 + *
2968 + * phase1 has counted enables so must reenable once claimed
2969 + * phase2 has unlimited enables
2970 + */
2971 +static void irq_sync(struct hevc_d_dev *const dev)
2972 +{
2973 + struct irq_sync sync;
2974 +
2975 + atomic_set(&sync.done, 0);
2976 + init_waitqueue_head(&sync.wq);
2977 +
2978 + hevc_d_hw_irq_active1_claim(dev, &sync.irq_ent, phase1_sync_claimed, &sync);
2979 + wait_event(sync.wq, atomic_read(&sync.done));
2980 +}
2981 +
2982 +static void h265_ctx_uninit(struct hevc_d_dev *const dev, struct hevc_d_ctx *ctx)
2983 +{
2984 + unsigned int i;
2985 +
2986 + dec_env_uninit(ctx);
2987 + dec_state_delete(ctx);
2988 +
2989 + /*
2990 + * dec_env & state must be killed before this to release the buffer to
2991 + * the free pool
2992 + */
2993 + aux_q_uninit(ctx);
2994 +
2995 + for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i)
2996 + gptr_free(dev, ctx->pu_bufs + i);
2997 + for (i = 0; i != ARRAY_SIZE(ctx->coeff_bufs); ++i)
2998 + gptr_free(dev, ctx->coeff_bufs + i);
2999 +}
3000 +
3001 +void hevc_d_h265_stop(struct hevc_d_ctx *ctx)
3002 +{
3003 + struct hevc_d_dev *const dev = ctx->dev;
3004 +
3005 + irq_sync(dev);
3006 + h265_ctx_uninit(dev, ctx);
3007 +}
3008 +
3009 +int hevc_d_h265_start(struct hevc_d_ctx *ctx)
3010 +{
3011 + struct hevc_d_dev *const dev = ctx->dev;
3012 + unsigned int i;
3013 +
3014 + unsigned int w = ctx->dst_fmt.width;
3015 + unsigned int h = ctx->dst_fmt.height;
3016 + unsigned int wxh;
3017 + size_t pu_alloc;
3018 + size_t coeff_alloc;
3019 +
3020 + /* Generate a sanitised WxH for memory alloc. Assume HD if unset */
3021 + if (w == 0)
3022 + w = 1920;
3023 + if (w > 4096)
3024 + w = 4096;
3025 + if (h == 0)
3026 + h = 1088;
3027 + if (h > 4096)
3028 + h = 4096;
3029 + wxh = w * h;
3030 +
3031 + ctx->fatal_err = 0;
3032 + ctx->dec0 = NULL;
3033 + ctx->state = kzalloc(sizeof(*ctx->state), GFP_KERNEL);
3034 + if (!ctx->state) {
3035 + v4l2_err(&dev->v4l2_dev, "Failed to allocate decode state\n");
3036 + goto fail;
3037 + }
3038 +
3039 + if (dec_env_init(ctx) != 0) {
3040 + v4l2_err(&dev->v4l2_dev, "Failed to allocate decode envs\n");
3041 + goto fail;
3042 + }
3043 +
3044 + coeff_alloc = hevc_d_round_up_size(wxh);
3045 + pu_alloc = hevc_d_round_up_size(wxh / 4);
3046 + for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) {
3047 + /* Don't actually need a kernel mapping here */
3048 + if (gptr_alloc(dev, ctx->pu_bufs + i, pu_alloc,
3049 + DMA_ATTR_NO_KERNEL_MAPPING)) {
3050 + v4l2_err(&dev->v4l2_dev,
3051 + "Failed to alloc %#zx PU%d buffer\n",
3052 + pu_alloc, i);
3053 + goto fail;
3054 + }
3055 + if (gptr_alloc(dev, ctx->coeff_bufs + i, coeff_alloc,
3056 + DMA_ATTR_NO_KERNEL_MAPPING)) {
3057 + v4l2_err(&dev->v4l2_dev,
3058 + "Failed to alloc %#zx Coeff%d buffer\n",
3059 + pu_alloc, i);
3060 + goto fail;
3061 + }
3062 + }
3063 + aux_q_init(ctx);
3064 +
3065 + return 0;
3066 +
3067 +fail:
3068 + h265_ctx_uninit(dev, ctx);
3069 + return -ENOMEM;
3070 +}
3071 +
3072 +void hevc_d_h265_trigger(struct hevc_d_ctx *ctx)
3073 +{
3074 + struct hevc_d_dev *const dev = ctx->dev;
3075 + struct hevc_d_dec_env *const de = ctx->dec0;
3076 + struct vb2_v4l2_buffer *src_buf;
3077 + struct media_request *req;
3078 +
3079 + src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
3080 + req = src_buf->vb2_buf.req_obj.req;
3081 +
3082 + switch (!de ? HEVC_D_DECODE_ERROR_DONE : de->state) {
3083 + default:
3084 + v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__,
3085 + de->state);
3086 + fallthrough;
3087 + case HEVC_D_DECODE_ERROR_DONE:
3088 + ctx->dec0 = NULL;
3089 + dec_env_delete(de);
3090 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
3091 + VB2_BUF_STATE_ERROR);
3092 + media_request_manual_complete(req);
3093 + break;
3094 +
3095 + case HEVC_D_DECODE_PHASE1:
3096 + ctx->dec0 = NULL;
3097 +
3098 + ctx->p1idx = (ctx->p1idx + 1 >= HEVC_D_P1BUF_COUNT) ?
3099 + 0 : ctx->p1idx + 1;
3100 +
3101 + /* We know we have src & dst so no need to test */
3102 + de->src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
3103 + de->frame_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
3104 + de->req_pin = req;
3105 +
3106 + /* We could get rid of the src buffer here if we've already
3107 + * copied it, but we don't copy the last buffer unless it
3108 + * didn't return a contig dma addr, and that shouldn't happen
3109 + */
3110 +
3111 + /* Enable the next setup if our Q isn't too big */
3112 + if (atomic_add_return(1, &ctx->p1out) < HEVC_D_P1BUF_COUNT)
3113 + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx);
3114 +
3115 + hevc_d_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed,
3116 + de);
3117 + break;
3118 + }
3119 +}
3120 +
3121 +static int try_ctrl_sps(struct v4l2_ctrl *ctrl)
3122 +{
3123 + const struct v4l2_ctrl_hevc_sps *const sps = ctrl->p_new.p_hevc_sps;
3124 + struct hevc_d_ctx *const ctx = ctrl->priv;
3125 + struct hevc_d_dev *const dev = ctx->dev;
3126 +
3127 + if (sps->chroma_format_idc != 1) {
3128 + v4l2_warn(&dev->v4l2_dev,
3129 + "Chroma format (%d) unsupported\n",
3130 + sps->chroma_format_idc);
3131 + return -EINVAL;
3132 + }
3133 +
3134 + if (sps->bit_depth_luma_minus8 != 0 &&
3135 + sps->bit_depth_luma_minus8 != 2) {
3136 + v4l2_warn(&dev->v4l2_dev,
3137 + "Luma depth (%d) unsupported\n",
3138 + sps->bit_depth_luma_minus8 + 8);
3139 + return -EINVAL;
3140 + }
3141 +
3142 + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8) {
3143 + v4l2_warn(&dev->v4l2_dev,
3144 + "Chroma depth (%d) != Luma depth (%d)\n",
3145 + sps->bit_depth_chroma_minus8 + 8,
3146 + sps->bit_depth_luma_minus8 + 8);
3147 + return -EINVAL;
3148 + }
3149 +
3150 + if (!sps->pic_width_in_luma_samples ||
3151 + !sps->pic_height_in_luma_samples ||
3152 + sps->pic_width_in_luma_samples > 4096 ||
3153 + sps->pic_height_in_luma_samples > 4096) {
3154 + v4l2_warn(&dev->v4l2_dev,
3155 + "Bad sps width (%u) x height (%u)\n",
3156 + sps->pic_width_in_luma_samples,
3157 + sps->pic_height_in_luma_samples);
3158 + return -EINVAL;
3159 + }
3160 +
3161 + if (!ctx->dst_fmt_set)
3162 + return 0;
3163 +
3164 + if ((sps->bit_depth_luma_minus8 == 0 &&
3165 + ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12MT_COL128) ||
3166 + (sps->bit_depth_luma_minus8 == 2 &&
3167 + ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12MT_10_COL128)) {
3168 + v4l2_warn(&dev->v4l2_dev,
3169 + "SPS luma depth %d does not match capture format\n",
3170 + sps->bit_depth_luma_minus8 + 8);
3171 + return -EINVAL;
3172 + }
3173 +
3174 + if (sps->pic_width_in_luma_samples > ctx->dst_fmt.width ||
3175 + sps->pic_height_in_luma_samples > ctx->dst_fmt.height) {
3176 + v4l2_warn(&dev->v4l2_dev,
3177 + "SPS size (%dx%d) > capture size (%d,%d)\n",
3178 + sps->pic_width_in_luma_samples,
3179 + sps->pic_height_in_luma_samples,
3180 + ctx->dst_fmt.width,
3181 + ctx->dst_fmt.height);
3182 + return -EINVAL;
3183 + }
3184 +
3185 + return 0;
3186 +}
3187 +
3188 +const struct v4l2_ctrl_ops hevc_d_hevc_sps_ctrl_ops = {
3189 + .try_ctrl = try_ctrl_sps,
3190 +};
3191 +
3192 +static int try_ctrl_pps(struct v4l2_ctrl *ctrl)
3193 +{
3194 + const struct v4l2_ctrl_hevc_pps *const pps = ctrl->p_new.p_hevc_pps;
3195 + struct hevc_d_ctx *const ctx = ctrl->priv;
3196 + struct hevc_d_dev *const dev = ctx->dev;
3197 +
3198 + if ((pps->flags &
3199 + V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) &&
3200 + (pps->flags &
3201 + V4L2_HEVC_PPS_FLAG_TILES_ENABLED) &&
3202 + (pps->num_tile_columns_minus1 || pps->num_tile_rows_minus1)) {
3203 + v4l2_warn(&dev->v4l2_dev,
3204 + "WPP + Tiles not supported\n");
3205 + return -EINVAL;
3206 + }
3207 +
3208 + return 0;
3209 +}
3210 +
3211 +const struct v4l2_ctrl_ops hevc_d_hevc_pps_ctrl_ops = {
3212 + .try_ctrl = try_ctrl_pps,
3213 +};
3214 +
3215 +void hevc_d_device_run(void *priv)
3216 +{
3217 + struct hevc_d_ctx *const ctx = priv;
3218 + struct hevc_d_dev *const dev = ctx->dev;
3219 + struct hevc_d_run run = {};
3220 + struct media_request *src_req;
3221 +
3222 + run.src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
3223 + run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
3224 +
3225 + if (!run.src || !run.dst) {
3226 + v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n",
3227 + __func__, run.src, run.dst);
3228 + goto fail;
3229 + }
3230 +
3231 + /* Apply request(s) controls */
3232 + src_req = run.src->vb2_buf.req_obj.req;
3233 + if (!src_req) {
3234 + v4l2_err(&dev->v4l2_dev, "%s: Missing request\n", __func__);
3235 + goto fail;
3236 + }
3237 +
3238 + v4l2_ctrl_request_setup(src_req, &ctx->hdl);
3239 +
3240 + switch (ctx->src_fmt.pixelformat) {
3241 + case V4L2_PIX_FMT_HEVC_SLICE:
3242 + {
3243 + const struct v4l2_ctrl *ctrl;
3244 +
3245 + run.h265.sps =
3246 + hevc_d_find_control_data(ctx,
3247 + V4L2_CID_STATELESS_HEVC_SPS);
3248 + run.h265.pps =
3249 + hevc_d_find_control_data(ctx,
3250 + V4L2_CID_STATELESS_HEVC_PPS);
3251 + run.h265.dec =
3252 + hevc_d_find_control_data(ctx,
3253 + V4L2_CID_STATELESS_HEVC_DECODE_PARAMS);
3254 +
3255 + ctrl = hevc_d_find_ctrl(ctx,
3256 + V4L2_CID_STATELESS_HEVC_SLICE_PARAMS);
3257 + if (!ctrl || !ctrl->elems) {
3258 + v4l2_err(&dev->v4l2_dev, "%s: Missing slice params\n",
3259 + __func__);
3260 + goto fail;
3261 + }
3262 + run.h265.slice_ents = ctrl->elems;
3263 + run.h265.slice_params = ctrl->p_cur.p;
3264 +
3265 + run.h265.scaling_matrix =
3266 + hevc_d_find_control_data(ctx,
3267 + V4L2_CID_STATELESS_HEVC_SCALING_MATRIX);
3268 + break;
3269 + }
3270 +
3271 + default:
3272 + break;
3273 + }
3274 +
3275 + v4l2_m2m_buf_copy_metadata(run.src, run.dst, true);
3276 +
3277 + hevc_d_h265_setup(ctx, &run);
3278 +
3279 + /* Complete request(s) controls */
3280 + v4l2_ctrl_request_complete(src_req, &ctx->hdl);
3281 +
3282 + hevc_d_h265_trigger(ctx);
3283 + return;
3284 +
3285 +fail:
3286 + /* We really shouldn't get here but tidy up what we can */
3287 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
3288 + VB2_BUF_STATE_ERROR);
3289 + media_request_manual_complete(src_req);
3290 +}
3291 --- /dev/null
3292 +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.h
3293 @@ -0,0 +1,23 @@
3294 +/* SPDX-License-Identifier: GPL-2.0-or-later */
3295 +/*
3296 + * Raspberry Pi HEVC driver
3297 + *
3298 + * Copyright (C) 2024 Raspberry Pi Ltd
3299 + *
3300 + */
3301 +
3302 +#ifndef _HEVC_D_H265_H_
3303 +#define _HEVC_D_H265_H_
3304 +#include "hevc_d.h"
3305 +
3306 +extern const struct v4l2_ctrl_ops hevc_d_hevc_sps_ctrl_ops;
3307 +extern const struct v4l2_ctrl_ops hevc_d_hevc_pps_ctrl_ops;
3308 +
3309 +void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run);
3310 +int hevc_d_h265_start(struct hevc_d_ctx *ctx);
3311 +void hevc_d_h265_stop(struct hevc_d_ctx *ctx);
3312 +void hevc_d_h265_trigger(struct hevc_d_ctx *ctx);
3313 +
3314 +void hevc_d_device_run(void *priv);
3315 +
3316 +#endif
3317 --- /dev/null
3318 +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.c
3319 @@ -0,0 +1,376 @@
3320 +// SPDX-License-Identifier: GPL-2.0
3321 +/*
3322 + * Raspberry Pi HEVC driver
3323 + *
3324 + * Copyright (C) 2024 Raspberry Pi Ltd
3325 + *
3326 + * Based on the Cedrus VPU driver, that is:
3327 + *
3328 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
3329 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
3330 + * Copyright (C) 2018 Bootlin
3331 + */
3332 +#include <linux/clk.h>
3333 +#include <linux/component.h>
3334 +#include <linux/dma-mapping.h>
3335 +#include <linux/interrupt.h>
3336 +#include <linux/io.h>
3337 +#include <linux/of_reserved_mem.h>
3338 +#include <linux/of_device.h>
3339 +#include <linux/of_platform.h>
3340 +#include <linux/platform_device.h>
3341 +#include <linux/regmap.h>
3342 +#include <linux/reset.h>
3343 +
3344 +#include <media/videobuf2-core.h>
3345 +#include <media/v4l2-mem2mem.h>
3346 +
3347 +#include <soc/bcm2835/raspberrypi-firmware.h>
3348 +
3349 +#include "hevc_d.h"
3350 +#include "hevc_d_hw.h"
3351 +
3352 +static void pre_irq(struct hevc_d_dev *dev, struct hevc_d_hw_irq_ent *ient,
3353 + hevc_d_irq_callback cb, void *v,
3354 + struct hevc_d_hw_irq_ctrl *ictl)
3355 +{
3356 + unsigned long flags;
3357 +
3358 + if (ictl->irq) {
3359 + v4l2_err(&dev->v4l2_dev, "Attempt to claim IRQ when already claimed\n");
3360 + return;
3361 + }
3362 +
3363 + ient->cb = cb;
3364 + ient->v = v;
3365 +
3366 + spin_lock_irqsave(&ictl->lock, flags);
3367 + ictl->irq = ient;
3368 + ictl->no_sched++;
3369 + spin_unlock_irqrestore(&ictl->lock, flags);
3370 +}
3371 +
3372 +/* Should be called from inside ictl->lock */
3373 +static inline bool sched_enabled(const struct hevc_d_hw_irq_ctrl * const ictl)
3374 +{
3375 + return ictl->no_sched <= 0 && ictl->enable;
3376 +}
3377 +
3378 +/* Should be called from inside ictl->lock & after checking sched_enabled() */
3379 +static inline void set_claimed(struct hevc_d_hw_irq_ctrl * const ictl)
3380 +{
3381 + if (ictl->enable > 0)
3382 + --ictl->enable;
3383 + ictl->no_sched = 1;
3384 +}
3385 +
3386 +/* Should be called from inside ictl->lock */
3387 +static struct hevc_d_hw_irq_ent *get_sched(struct hevc_d_hw_irq_ctrl * const ictl)
3388 +{
3389 + struct hevc_d_hw_irq_ent *ient;
3390 +
3391 + if (!sched_enabled(ictl))
3392 + return NULL;
3393 +
3394 + ient = ictl->claim;
3395 + if (!ient)
3396 + return NULL;
3397 + ictl->claim = ient->next;
3398 +
3399 + set_claimed(ictl);
3400 + return ient;
3401 +}
3402 +
3403 +/* Run a callback & check to see if there is anything else to run */
3404 +static void sched_cb(struct hevc_d_dev * const dev,
3405 + struct hevc_d_hw_irq_ctrl * const ictl,
3406 + struct hevc_d_hw_irq_ent *ient)
3407 +{
3408 + while (ient) {
3409 + unsigned long flags;
3410 +
3411 + ient->cb(dev, ient->v);
3412 +
3413 + spin_lock_irqsave(&ictl->lock, flags);
3414 +
3415 + /*
3416 + * Always dec no_sched after cb exec - must have been set
3417 + * on entry to cb
3418 + */
3419 + --ictl->no_sched;
3420 + ient = get_sched(ictl);
3421 +
3422 + spin_unlock_irqrestore(&ictl->lock, flags);
3423 + }
3424 +}
3425 +
3426 +/* Should only ever be called from its own IRQ cb so no lock required */
3427 +static void pre_thread(struct hevc_d_dev *dev,
3428 + struct hevc_d_hw_irq_ent *ient,
3429 + hevc_d_irq_callback cb, void *v,
3430 + struct hevc_d_hw_irq_ctrl *ictl)
3431 +{
3432 + ient->cb = cb;
3433 + ient->v = v;
3434 + ictl->irq = ient;
3435 + ictl->thread_reqed = true;
3436 + ictl->no_sched++; /* This is unwound in do_thread */
3437 +}
3438 +
3439 +/* Called in irq context */
3440 +static void do_irq(struct hevc_d_dev * const dev,
3441 + struct hevc_d_hw_irq_ctrl * const ictl)
3442 +{
3443 + struct hevc_d_hw_irq_ent *ient;
3444 + unsigned long flags;
3445 +
3446 + spin_lock_irqsave(&ictl->lock, flags);
3447 + ient = ictl->irq;
3448 + ictl->irq = NULL;
3449 + spin_unlock_irqrestore(&ictl->lock, flags);
3450 +
3451 + sched_cb(dev, ictl, ient);
3452 +}
3453 +
3454 +static void do_claim(struct hevc_d_dev * const dev,
3455 + struct hevc_d_hw_irq_ent *ient,
3456 + const hevc_d_irq_callback cb, void * const v,
3457 + struct hevc_d_hw_irq_ctrl * const ictl)
3458 +{
3459 + unsigned long flags;
3460 +
3461 + ient->next = NULL;
3462 + ient->cb = cb;
3463 + ient->v = v;
3464 +
3465 + spin_lock_irqsave(&ictl->lock, flags);
3466 +
3467 + if (ictl->claim) {
3468 + /* If we have a Q then add to end */
3469 + ictl->tail->next = ient;
3470 + ictl->tail = ient;
3471 + ient = NULL;
3472 + } else if (!sched_enabled(ictl)) {
3473 + /* Empty Q but other activity in progress so Q */
3474 + ictl->claim = ient;
3475 + ictl->tail = ient;
3476 + ient = NULL;
3477 + } else {
3478 + /*
3479 + * Nothing else going on - schedule immediately and
3480 + * prevent anything else scheduling claims
3481 + */
3482 + set_claimed(ictl);
3483 + }
3484 +
3485 + spin_unlock_irqrestore(&ictl->lock, flags);
3486 +
3487 + sched_cb(dev, ictl, ient);
3488 +}
3489 +
3490 +/* Enable n claims.
3491 + * n < 0 set to unlimited (default on init)
3492 + * n = 0 if previously unlimited then disable otherwise nop
3493 + * n > 0 if previously unlimited then set to n enables
3494 + * otherwise add n enables
3495 + * The enable count is automatically decremented every time a claim is run
3496 + */
3497 +static void do_enable_claim(struct hevc_d_dev * const dev,
3498 + int n,
3499 + struct hevc_d_hw_irq_ctrl * const ictl)
3500 +{
3501 + unsigned long flags;
3502 + struct hevc_d_hw_irq_ent *ient;
3503 +
3504 + spin_lock_irqsave(&ictl->lock, flags);
3505 + ictl->enable = n < 0 ? -1 : ictl->enable <= 0 ? n : ictl->enable + n;
3506 + ient = get_sched(ictl);
3507 + spin_unlock_irqrestore(&ictl->lock, flags);
3508 +
3509 + sched_cb(dev, ictl, ient);
3510 +}
3511 +
3512 +static void ictl_init(struct hevc_d_hw_irq_ctrl * const ictl, int enables)
3513 +{
3514 + spin_lock_init(&ictl->lock);
3515 + ictl->claim = NULL;
3516 + ictl->tail = NULL;
3517 + ictl->irq = NULL;
3518 + ictl->no_sched = 0;
3519 + ictl->enable = enables;
3520 + ictl->thread_reqed = false;
3521 +}
3522 +
3523 +static void ictl_uninit(struct hevc_d_hw_irq_ctrl * const ictl)
3524 +{
3525 + /* Nothing to do */
3526 +}
3527 +
3528 +static irqreturn_t hevc_d_irq_irq(int irq, void *data)
3529 +{
3530 + struct hevc_d_dev * const dev = data;
3531 + __u32 ictrl;
3532 +
3533 + ictrl = irq_read(dev, ARG_IC_ICTRL);
3534 + if (!(ictrl & ARG_IC_ICTRL_ALL_IRQ_MASK)) {
3535 + v4l2_warn(&dev->v4l2_dev, "IRQ but no IRQ bits set\n");
3536 + return IRQ_NONE;
3537 + }
3538 +
3539 + /* Cancel any/all irqs */
3540 + irq_write(dev, ARG_IC_ICTRL, ictrl & ~ARG_IC_ICTRL_SET_ZERO_MASK);
3541 +
3542 + /*
3543 + * Service Active2 before Active1 so Phase 1 can transition to Phase 2
3544 + * without delay
3545 + */
3546 + if (ictrl & ARG_IC_ICTRL_ACTIVE2_INT_SET)
3547 + do_irq(dev, &dev->ic_active2);
3548 + if (ictrl & ARG_IC_ICTRL_ACTIVE1_INT_SET)
3549 + do_irq(dev, &dev->ic_active1);
3550 +
3551 + return dev->ic_active1.thread_reqed || dev->ic_active2.thread_reqed ?
3552 + IRQ_WAKE_THREAD : IRQ_HANDLED;
3553 +}
3554 +
3555 +static void do_thread(struct hevc_d_dev * const dev,
3556 + struct hevc_d_hw_irq_ctrl *const ictl)
3557 +{
3558 + unsigned long flags;
3559 + struct hevc_d_hw_irq_ent *ient = NULL;
3560 +
3561 + spin_lock_irqsave(&ictl->lock, flags);
3562 +
3563 + if (ictl->thread_reqed) {
3564 + ient = ictl->irq;
3565 + ictl->thread_reqed = false;
3566 + ictl->irq = NULL;
3567 + }
3568 +
3569 + spin_unlock_irqrestore(&ictl->lock, flags);
3570 +
3571 + sched_cb(dev, ictl, ient);
3572 +}
3573 +
3574 +static irqreturn_t hevc_d_irq_thread(int irq, void *data)
3575 +{
3576 + struct hevc_d_dev * const dev = data;
3577 +
3578 + do_thread(dev, &dev->ic_active1);
3579 + do_thread(dev, &dev->ic_active2);
3580 +
3581 + return IRQ_HANDLED;
3582 +}
3583 +
3584 +/*
3585 + * May only be called from Active1 CB
3586 + * IRQs should not be expected until execution continues in the cb
3587 + */
3588 +void hevc_d_hw_irq_active1_thread(struct hevc_d_dev *dev,
3589 + struct hevc_d_hw_irq_ent *ient,
3590 + hevc_d_irq_callback thread_cb, void *ctx)
3591 +{
3592 + pre_thread(dev, ient, thread_cb, ctx, &dev->ic_active1);
3593 +}
3594 +
3595 +void hevc_d_hw_irq_active1_enable_claim(struct hevc_d_dev *dev,
3596 + int n)
3597 +{
3598 + do_enable_claim(dev, n, &dev->ic_active1);
3599 +}
3600 +
3601 +void hevc_d_hw_irq_active1_claim(struct hevc_d_dev *dev,
3602 + struct hevc_d_hw_irq_ent *ient,
3603 + hevc_d_irq_callback ready_cb, void *ctx)
3604 +{
3605 + do_claim(dev, ient, ready_cb, ctx, &dev->ic_active1);
3606 +}
3607 +
3608 +void hevc_d_hw_irq_active1_irq(struct hevc_d_dev *dev,
3609 + struct hevc_d_hw_irq_ent *ient,
3610 + hevc_d_irq_callback irq_cb, void *ctx)
3611 +{
3612 + pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active1);
3613 +}
3614 +
3615 +void hevc_d_hw_irq_active2_claim(struct hevc_d_dev *dev,
3616 + struct hevc_d_hw_irq_ent *ient,
3617 + hevc_d_irq_callback ready_cb, void *ctx)
3618 +{
3619 + do_claim(dev, ient, ready_cb, ctx, &dev->ic_active2);
3620 +}
3621 +
3622 +void hevc_d_hw_irq_active2_irq(struct hevc_d_dev *dev,
3623 + struct hevc_d_hw_irq_ent *ient,
3624 + hevc_d_irq_callback irq_cb, void *ctx)
3625 +{
3626 + pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active2);
3627 +}
3628 +
3629 +int hevc_d_hw_probe(struct hevc_d_dev *dev)
3630 +{
3631 + struct rpi_firmware *firmware;
3632 + struct device_node *node;
3633 + __u32 irq_stat;
3634 + int irq_dec;
3635 + int ret = 0;
3636 +
3637 + ictl_init(&dev->ic_active1, HEVC_D_P2BUF_COUNT);
3638 + ictl_init(&dev->ic_active2, HEVC_D_ICTL_ENABLE_UNLIMITED);
3639 +
3640 + dev->base_irq = devm_platform_ioremap_resource_byname(dev->pdev, "intc");
3641 + if (IS_ERR(dev->base_irq))
3642 + return PTR_ERR(dev->base_irq);
3643 +
3644 + dev->base_h265 = devm_platform_ioremap_resource_byname(dev->pdev, "hevc");
3645 + if (IS_ERR(dev->base_h265))
3646 + return PTR_ERR(dev->base_h265);
3647 +
3648 + dev->clock = devm_clk_get(&dev->pdev->dev, NULL);
3649 + if (IS_ERR(dev->clock))
3650 + return PTR_ERR(dev->clock);
3651 +
3652 + node = rpi_firmware_find_node();
3653 + if (!node)
3654 + return -EINVAL;
3655 +
3656 + firmware = rpi_firmware_get(node);
3657 + of_node_put(node);
3658 + if (!firmware)
3659 + return -EPROBE_DEFER;
3660 +
3661 + dev->max_clock_rate = rpi_firmware_clk_get_max_rate(firmware,
3662 + RPI_FIRMWARE_HEVC_CLK_ID);
3663 + rpi_firmware_put(firmware);
3664 +
3665 + dev->cache_align = dma_get_cache_alignment();
3666 +
3667 + /* Disable IRQs & reset anything pending */
3668 + irq_write(dev, 0,
3669 + ARG_IC_ICTRL_ACTIVE1_EN_SET | ARG_IC_ICTRL_ACTIVE2_EN_SET);
3670 + irq_stat = irq_read(dev, 0);
3671 + irq_write(dev, 0, irq_stat);
3672 +
3673 + irq_dec = platform_get_irq(dev->pdev, 0);
3674 + if (irq_dec <= 0)
3675 + return irq_dec;
3676 + ret = devm_request_threaded_irq(dev->dev, irq_dec,
3677 + hevc_d_irq_irq,
3678 + hevc_d_irq_thread,
3679 + 0, dev_name(dev->dev), dev);
3680 + if (ret)
3681 + dev_err(dev->dev, "Failed to request IRQ - %d\n", ret);
3682 +
3683 + return ret;
3684 +}
3685 +
3686 +void hevc_d_hw_remove(struct hevc_d_dev *dev)
3687 +{
3688 + /*
3689 + * IRQ auto freed on unload so no need to do it here
3690 + * ioremap auto freed on unload
3691 + */
3692 + ictl_uninit(&dev->ic_active1);
3693 + ictl_uninit(&dev->ic_active2);
3694 +}
3695 +
3696 --- /dev/null
3697 +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.h
3698 @@ -0,0 +1,303 @@
3699 +/* SPDX-License-Identifier: GPL-2.0 */
3700 +/*
3701 + * Raspberry Pi HEVC driver
3702 + *
3703 + * Copyright (C) 2024 Raspberry Pi Ltd
3704 + *
3705 + * Based on the Cedrus VPU driver, that is:
3706 + *
3707 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
3708 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
3709 + * Copyright (C) 2018 Bootlin
3710 + */
3711 +
3712 +#ifndef _HEVC_D_HW_H_
3713 +#define _HEVC_D_HW_H_
3714 +
3715 +struct hevc_d_hw_irq_ent {
3716 + struct hevc_d_hw_irq_ent *next;
3717 + hevc_d_irq_callback cb;
3718 + void *v;
3719 +};
3720 +
3721 +/* Phase 1 Register offsets */
3722 +
3723 +#define RPI_SPS0 0
3724 +#define RPI_SPS1 4
3725 +#define RPI_PPS 8
3726 +#define RPI_SLICE 12
3727 +#define RPI_TILESTART 16
3728 +#define RPI_TILEEND 20
3729 +#define RPI_SLICESTART 24
3730 +#define RPI_MODE 28
3731 +#define RPI_LEFT0 32
3732 +#define RPI_LEFT1 36
3733 +#define RPI_LEFT2 40
3734 +#define RPI_LEFT3 44
3735 +#define RPI_QP 48
3736 +#define RPI_CONTROL 52
3737 +#define RPI_STATUS 56
3738 +#define RPI_VERSION 60
3739 +#define RPI_BFBASE 64
3740 +#define RPI_BFNUM 68
3741 +#define RPI_BFCONTROL 72
3742 +#define RPI_BFSTATUS 76
3743 +#define RPI_PUWBASE 80
3744 +#define RPI_PUWSTRIDE 84
3745 +#define RPI_COEFFWBASE 88
3746 +#define RPI_COEFFWSTRIDE 92
3747 +#define RPI_SLICECMDS 96
3748 +#define RPI_BEGINTILEEND 100
3749 +#define RPI_TRANSFER 104
3750 +#define RPI_CFBASE 108
3751 +#define RPI_CFNUM 112
3752 +#define RPI_CFSTATUS 116
3753 +
3754 +/* Phase 2 Register offsets */
3755 +
3756 +#define RPI_PURBASE 0x8000
3757 +#define RPI_PURSTRIDE 0x8004
3758 +#define RPI_COEFFRBASE 0x8008
3759 +#define RPI_COEFFRSTRIDE 0x800C
3760 +#define RPI_NUMROWS 0x8010
3761 +#define RPI_CONFIG2 0x8014
3762 +#define RPI_OUTYBASE 0x8018
3763 +#define RPI_OUTYSTRIDE 0x801C
3764 +#define RPI_OUTCBASE 0x8020
3765 +#define RPI_OUTCSTRIDE 0x8024
3766 +#define RPI_STATUS2 0x8028
3767 +#define RPI_FRAMESIZE 0x802C
3768 +#define RPI_MVBASE 0x8030
3769 +#define RPI_MVSTRIDE 0x8034
3770 +#define RPI_COLBASE 0x8038
3771 +#define RPI_COLSTRIDE 0x803C
3772 +#define RPI_CURRPOC 0x8040
3773 +
3774 +/*
3775 + * Write a general register value
3776 + * Order is unimportant
3777 + */
3778 +static inline void apb_write(const struct hevc_d_dev * const dev,
3779 + const unsigned int offset, const u32 val)
3780 +{
3781 + writel_relaxed(val, dev->base_h265 + offset);
3782 +}
3783 +
3784 +/* Write the final register value that actually starts the phase */
3785 +static inline void apb_write_final(const struct hevc_d_dev * const dev,
3786 + const unsigned int offset, const u32 val)
3787 +{
3788 + writel(val, dev->base_h265 + offset);
3789 +}
3790 +
3791 +static inline u32 apb_read(const struct hevc_d_dev * const dev,
3792 + const unsigned int offset)
3793 +{
3794 + return readl(dev->base_h265 + offset);
3795 +}
3796 +
3797 +static inline void irq_write(const struct hevc_d_dev * const dev,
3798 + const unsigned int offset, const u32 val)
3799 +{
3800 + writel(val, dev->base_irq + offset);
3801 +}
3802 +
3803 +static inline u32 irq_read(const struct hevc_d_dev * const dev,
3804 + const unsigned int offset)
3805 +{
3806 + return readl(dev->base_irq + offset);
3807 +}
3808 +
3809 +static inline void apb_write_vc_addr(const struct hevc_d_dev * const dev,
3810 + const unsigned int offset,
3811 + const dma_addr_t a)
3812 +{
3813 + apb_write(dev, offset, (u32)(a >> 6));
3814 +}
3815 +
3816 +static inline void apb_write_vc_addr_final(const struct hevc_d_dev * const dev,
3817 + const unsigned int offset,
3818 + const dma_addr_t a)
3819 +{
3820 + apb_write_final(dev, offset, (u32)(a >> 6));
3821 +}
3822 +
3823 +static inline void apb_write_vc_len(const struct hevc_d_dev * const dev,
3824 + const unsigned int offset,
3825 + const unsigned int x)
3826 +{
3827 + apb_write(dev, offset, (x + 63) >> 6);
3828 +}
3829 +
3830 +/* *ARG_IC_ICTRL - Interrupt control for ARGON Core*
3831 + * Offset (byte space) = 40'h2b10000
3832 + * Physical Address (byte space) = 40'h7eb10000
3833 + * Verilog Macro Address = `ARG_IC_REG_START + `ARGON_INTCTRL_ICTRL
3834 + * Reset Value = 32'b100x100x_100xxxxx_xxxxxxx0_x100x100
3835 + * Access = RW (32-bit only)
3836 + * Interrupt control logic for ARGON Core.
3837 + */
3838 +#define ARG_IC_ICTRL 0
3839 +
3840 +/* acc=LWC ACTIVE1_INT FIELD ACCESS: LWC
3841 + *
3842 + * Interrupt 1
3843 + * This is set and held when an hevc_active1 interrupt edge is detected
3844 + * The polarity of the edge is set by the ACTIVE1_EDGE field
3845 + * Write a 1 to this bit to clear down the latched interrupt
3846 + * The latched interrupt is only enabled out onto the interrupt line if
3847 + * ACTIVE1_EN is set
3848 + * Reset value is *0* decimal.
3849 + */
3850 +#define ARG_IC_ICTRL_ACTIVE1_INT_SET BIT(0)
3851 +
3852 +/* ACTIVE1_EDGE Sets the polarity of the interrupt edge detection logic
3853 + * This logic detects edges of the hevc_active1 line from the argon core
3854 + * 0 = negedge, 1 = posedge
3855 + * Reset value is *0* decimal.
3856 + */
3857 +#define ARG_IC_ICTRL_ACTIVE1_EDGE_SET BIT(1)
3858 +
3859 +/* ACTIVE1_EN Enables ACTIVE1_INT out onto the argon interrupt line.
3860 + * If this isn't set, the interrupt logic will work but no interrupt will be
3861 + * set to the interrupt controller
3862 + * Reset value is *1* decimal.
3863 + *
3864 + * [JC] The above appears to be a lie - if unset then b0 is never set
3865 + */
3866 +#define ARG_IC_ICTRL_ACTIVE1_EN_SET BIT(2)
3867 +
3868 +/* acc=RO ACTIVE1_STATUS FIELD ACCESS: RO
3869 + *
3870 + * The current status of the hevc_active1 signal
3871 + */
3872 +#define ARG_IC_ICTRL_ACTIVE1_STATUS_SET BIT(3)
3873 +
3874 +/* acc=LWC ACTIVE2_INT FIELD ACCESS: LWC
3875 + *
3876 + * Interrupt 2
3877 + * This is set and held when an hevc_active2 interrupt edge is detected
3878 + * The polarity of the edge is set by the ACTIVE2_EDGE field
3879 + * Write a 1 to this bit to clear down the latched interrupt
3880 + * The latched interrupt is only enabled out onto the interrupt line if
3881 + * ACTIVE2_EN is set
3882 + * Reset value is *0* decimal.
3883 + */
3884 +#define ARG_IC_ICTRL_ACTIVE2_INT_SET BIT(4)
3885 +
3886 +/* ACTIVE2_EDGE Sets the polarity of the interrupt edge detection logic
3887 + * This logic detects edges of the hevc_active2 line from the argon core
3888 + * 0 = negedge, 1 = posedge
3889 + * Reset value is *0* decimal.
3890 + */
3891 +#define ARG_IC_ICTRL_ACTIVE2_EDGE_SET BIT(5)
3892 +
3893 +/* ACTIVE2_EN Enables ACTIVE2_INT out onto the argon interrupt line.
3894 + * If this isn't set, the interrupt logic will work but no interrupt will be
3895 + * set to the interrupt controller
3896 + * Reset value is *1* decimal.
3897 + */
3898 +#define ARG_IC_ICTRL_ACTIVE2_EN_SET BIT(6)
3899 +
3900 +/* acc=RO ACTIVE2_STATUS FIELD ACCESS: RO
3901 + *
3902 + * The current status of the hevc_active2 signal
3903 + */
3904 +#define ARG_IC_ICTRL_ACTIVE2_STATUS_SET BIT(7)
3905 +
3906 +/* TEST_INT Forces the argon int high for test purposes.
3907 + * Reset value is *0* decimal.
3908 + */
3909 +#define ARG_IC_ICTRL_TEST_INT BIT(8)
3910 +#define ARG_IC_ICTRL_SPARE BIT(9)
3911 +
3912 +/* acc=RO VP9_INTERRUPT_STATUS FIELD ACCESS: RO
3913 + *
3914 + * The current status of the vp9_interrupt signal
3915 + */
3916 +#define ARG_IC_ICTRL_VP9_INTERRUPT_STATUS BIT(10)
3917 +
3918 +/* AIO_INT_ENABLE 1 = Or the AIO int in with the Argon int so the VPU can see
3919 + * it
3920 + * 0 = the AIO int is masked. (It should still be connected to the GIC though).
3921 + */
3922 +#define ARG_IC_ICTRL_AIO_INT_ENABLE BIT(20)
3923 +#define ARG_IC_ICTRL_H264_ACTIVE_INT BIT(21)
3924 +#define ARG_IC_ICTRL_H264_ACTIVE_EDGE BIT(22)
3925 +#define ARG_IC_ICTRL_H264_ACTIVE_EN BIT(23)
3926 +#define ARG_IC_ICTRL_H264_ACTIVE_STATUS BIT(24)
3927 +#define ARG_IC_ICTRL_H264_INTERRUPT_INT BIT(25)
3928 +#define ARG_IC_ICTRL_H264_INTERRUPT_EDGE BIT(26)
3929 +#define ARG_IC_ICTRL_H264_INTERRUPT_EN BIT(27)
3930 +
3931 +/* acc=RO H264_INTERRUPT_STATUS FIELD ACCESS: RO
3932 + *
3933 + * The current status of the h264_interrupt signal
3934 + */
3935 +#define ARG_IC_ICTRL_H264_INTERRUPT_STATUS BIT(28)
3936 +
3937 +/* acc=LWC VP9_INTERRUPT_INT FIELD ACCESS: LWC
3938 + *
3939 + * Interrupt 1
3940 + * This is set and held when an vp9_interrupt interrupt edge is detected
3941 + * The polarity of the edge is set by the VP9_INTERRUPT_EDGE field
3942 + * Write a 1 to this bit to clear down the latched interrupt
3943 + * The latched interrupt is only enabled out onto the interrupt line if
3944 + * VP9_INTERRUPT_EN is set
3945 + * Reset value is *0* decimal.
3946 + */
3947 +#define ARG_IC_ICTRL_VP9_INTERRUPT_INT BIT(29)
3948 +
3949 +/* VP9_INTERRUPT_EDGE Sets the polarity of the interrupt edge detection logic
3950 + * This logic detects edges of the vp9_interrupt line from the argon h264 core
3951 + * 0 = negedge, 1 = posedge
3952 + * Reset value is *0* decimal.
3953 + */
3954 +#define ARG_IC_ICTRL_VP9_INTERRUPT_EDGE BIT(30)
3955 +
3956 +/* VP9_INTERRUPT_EN Enables VP9_INTERRUPT_INT out onto the argon interrupt line.
3957 + * If this isn't set, the interrupt logic will work but no interrupt will be
3958 + * set to the interrupt controller
3959 + * Reset value is *1* decimal.
3960 + */
3961 +#define ARG_IC_ICTRL_VP9_INTERRUPT_EN BIT(31)
3962 +
3963 +/* Bits 19:12, 11 reserved - read ?, write 0 */
3964 +#define ARG_IC_ICTRL_SET_ZERO_MASK ((0xff << 12) | BIT(11))
3965 +
3966 +/* All IRQ bits */
3967 +#define ARG_IC_ICTRL_ALL_IRQ_MASK (\
3968 + ARG_IC_ICTRL_VP9_INTERRUPT_INT |\
3969 + ARG_IC_ICTRL_H264_INTERRUPT_INT |\
3970 + ARG_IC_ICTRL_ACTIVE1_INT_SET |\
3971 + ARG_IC_ICTRL_ACTIVE2_INT_SET)
3972 +
3973 +/* Regulate claim Q */
3974 +void hevc_d_hw_irq_active1_enable_claim(struct hevc_d_dev *dev,
3975 + int n);
3976 +/* Auto release once all CBs called */
3977 +void hevc_d_hw_irq_active1_claim(struct hevc_d_dev *dev,
3978 + struct hevc_d_hw_irq_ent *ient,
3979 + hevc_d_irq_callback ready_cb, void *ctx);
3980 +/* May only be called in claim cb */
3981 +void hevc_d_hw_irq_active1_irq(struct hevc_d_dev *dev,
3982 + struct hevc_d_hw_irq_ent *ient,
3983 + hevc_d_irq_callback irq_cb, void *ctx);
3984 +/* May only be called in irq cb */
3985 +void hevc_d_hw_irq_active1_thread(struct hevc_d_dev *dev,
3986 + struct hevc_d_hw_irq_ent *ient,
3987 + hevc_d_irq_callback thread_cb, void *ctx);
3988 +
3989 +/* Auto release once all CBs called */
3990 +void hevc_d_hw_irq_active2_claim(struct hevc_d_dev *dev,
3991 + struct hevc_d_hw_irq_ent *ient,
3992 + hevc_d_irq_callback ready_cb, void *ctx);
3993 +/* May only be called in claim cb */
3994 +void hevc_d_hw_irq_active2_irq(struct hevc_d_dev *dev,
3995 + struct hevc_d_hw_irq_ent *ient,
3996 + hevc_d_irq_callback irq_cb, void *ctx);
3997 +
3998 +int hevc_d_hw_probe(struct hevc_d_dev *dev);
3999 +void hevc_d_hw_remove(struct hevc_d_dev *dev);
4000 +
4001 +#endif
4002 --- /dev/null
4003 +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.c
4004 @@ -0,0 +1,688 @@
4005 +// SPDX-License-Identifier: GPL-2.0
4006 +/*
4007 + * Raspberry Pi HEVC driver
4008 + *
4009 + * Copyright (C) 2024 Raspberry Pi Ltd
4010 + *
4011 + * Based on the Cedrus VPU driver, that is:
4012 + *
4013 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
4014 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
4015 + * Copyright (C) 2018 Bootlin
4016 + */
4017 +
4018 +#include <media/videobuf2-dma-contig.h>
4019 +#include <media/v4l2-device.h>
4020 +#include <media/v4l2-ioctl.h>
4021 +#include <media/v4l2-event.h>
4022 +#include <media/v4l2-mem2mem.h>
4023 +
4024 +#include "hevc_d.h"
4025 +#include "hevc_d_h265.h"
4026 +#include "hevc_d_hw.h"
4027 +#include "hevc_d_video.h"
4028 +
4029 +#define HEVC_D_DECODE_SRC BIT(0)
4030 +#define HEVC_D_DECODE_DST BIT(1)
4031 +
4032 +#define HEVC_D_MIN_WIDTH 16U
4033 +#define HEVC_D_MIN_HEIGHT 16U
4034 +#define HEVC_D_DEFAULT_WIDTH 1920U
4035 +#define HEVC_D_DEFAULT_HEIGHT 1088U
4036 +#define HEVC_D_MAX_WIDTH 4096U
4037 +#define HEVC_D_MAX_HEIGHT 4096U
4038 +
4039 +static inline struct hevc_d_ctx *hevc_d_file2ctx(struct file *file)
4040 +{
4041 + return container_of(file->private_data, struct hevc_d_ctx, fh);
4042 +}
4043 +
4044 +/* constrain x to y,y*2 */
4045 +static inline unsigned int constrain2x(unsigned int x, unsigned int y)
4046 +{
4047 + return (x < y) ?
4048 + y :
4049 + (x > y * 2) ? y : x;
4050 +}
4051 +
4052 +size_t hevc_d_round_up_size(const size_t x)
4053 +{
4054 + /* Admit no size < 256 */
4055 + const unsigned int n = x < 256 ? 8 : ilog2(x);
4056 +
4057 + return x >= (3 << n) ? 4 << n : (3 << n);
4058 +}
4059 +
4060 +size_t hevc_d_bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8)
4061 +{
4062 + const size_t wxh = w * h;
4063 + size_t bits_alloc;
4064 +
4065 + /* Annex A gives a min compression of 2 @ lvl 3.1
4066 + * (wxh <= 983040) and min 4 thereafter but avoid
4067 + * the odity of 983041 having a lower limit than
4068 + * 983040.
4069 + * Multiply by 3/2 for 4:2:0
4070 + */
4071 + bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
4072 + wxh < 983040 * 2 ? 983040 * 3 / 4 :
4073 + wxh * 3 / 8;
4074 + /* Allow for bit depth */
4075 + bits_alloc += (bits_alloc * bits_minus8) / 8;
4076 + return hevc_d_round_up_size(bits_alloc);
4077 +}
4078 +
4079 +void hevc_d_prepare_src_format(struct v4l2_pix_format_mplane *pix_fmt)
4080 +{
4081 + size_t size;
4082 + u32 w;
4083 + u32 h;
4084 +
4085 + w = pix_fmt->width;
4086 + h = pix_fmt->height;
4087 + if (!w || !h) {
4088 + w = HEVC_D_DEFAULT_WIDTH;
4089 + h = HEVC_D_DEFAULT_HEIGHT;
4090 + }
4091 + if (w > HEVC_D_MAX_WIDTH)
4092 + w = HEVC_D_MAX_WIDTH;
4093 + if (h > HEVC_D_MAX_HEIGHT)
4094 + h = HEVC_D_MAX_HEIGHT;
4095 +
4096 + if (!pix_fmt->plane_fmt[0].sizeimage ||
4097 + pix_fmt->plane_fmt[0].sizeimage > SZ_32M) {
4098 + /* Unspecified or way too big - pick max for size */
4099 + size = hevc_d_bit_buf_size(w, h, 2);
4100 + }
4101 + /* Set a minimum */
4102 + size = max_t(u32, SZ_4K, pix_fmt->plane_fmt[0].sizeimage);
4103 +
4104 + pix_fmt->pixelformat = V4L2_PIX_FMT_HEVC_SLICE;
4105 + pix_fmt->width = w;
4106 + pix_fmt->height = h;
4107 + pix_fmt->num_planes = 1;
4108 + pix_fmt->field = V4L2_FIELD_NONE;
4109 + /* Zero bytes per line for encoded source. */
4110 + pix_fmt->plane_fmt[0].bytesperline = 0;
4111 + pix_fmt->plane_fmt[0].sizeimage = size;
4112 +}
4113 +
4114 +/* Take any pix_format and make it valid */
4115 +static void hevc_d_prepare_dst_format(struct v4l2_pix_format_mplane *pix_fmt)
4116 +{
4117 + unsigned int width = pix_fmt->width;
4118 + unsigned int height = pix_fmt->height;
4119 + unsigned int sizeimage = pix_fmt->plane_fmt[0].sizeimage;
4120 + unsigned int bytesperline = pix_fmt->plane_fmt[0].bytesperline;
4121 +
4122 + if (!width)
4123 + width = HEVC_D_DEFAULT_WIDTH;
4124 + if (width > HEVC_D_MAX_WIDTH)
4125 + width = HEVC_D_MAX_WIDTH;
4126 + if (!height)
4127 + height = HEVC_D_DEFAULT_HEIGHT;
4128 + if (height > HEVC_D_MAX_HEIGHT)
4129 + height = HEVC_D_MAX_HEIGHT;
4130 +
4131 + /* For column formats set bytesperline to column height (stride2) */
4132 + switch (pix_fmt->pixelformat) {
4133 + default:
4134 + pix_fmt->pixelformat = V4L2_PIX_FMT_NV12MT_COL128;
4135 + fallthrough;
4136 + case V4L2_PIX_FMT_NV12MT_COL128:
4137 + /* Width rounds up to columns */
4138 + width = ALIGN(width, 128);
4139 + height = ALIGN(height, 8);
4140 +
4141 + /* column height is sizeimage / bytesperline */
4142 + bytesperline = width;
4143 + sizeimage = bytesperline * height;
4144 + break;
4145 +
4146 + case V4L2_PIX_FMT_NV12MT_10_COL128:
4147 + /* width in pixels (3 pels = 4 bytes) rounded to 128 byte
4148 + * columns
4149 + */
4150 + width = ALIGN(((width + 2) / 3), 32) * 3;
4151 + height = ALIGN(height, 8);
4152 +
4153 + /* column height is sizeimage / bytesperline */
4154 + bytesperline = width * 4 / 3;
4155 + sizeimage = bytesperline * height;
4156 + break;
4157 + }
4158 +
4159 + pix_fmt->width = width;
4160 + pix_fmt->height = height;
4161 +
4162 + pix_fmt->field = V4L2_FIELD_NONE;
4163 + pix_fmt->plane_fmt[0].bytesperline = bytesperline;
4164 + pix_fmt->plane_fmt[0].sizeimage = sizeimage;
4165 + pix_fmt->plane_fmt[1].bytesperline = bytesperline;
4166 + pix_fmt->plane_fmt[1].sizeimage = sizeimage / 2;
4167 + pix_fmt->num_planes = 2;
4168 +}
4169 +
4170 +static int hevc_d_querycap(struct file *file, void *priv,
4171 + struct v4l2_capability *cap)
4172 +{
4173 + strscpy(cap->driver, HEVC_D_NAME, sizeof(cap->driver));
4174 + strscpy(cap->card, HEVC_D_NAME, sizeof(cap->card));
4175 + snprintf(cap->bus_info, sizeof(cap->bus_info),
4176 + "platform:%s", HEVC_D_NAME);
4177 +
4178 + return 0;
4179 +}
4180 +
4181 +static int hevc_d_enum_fmt_vid_out(struct file *file, void *priv,
4182 + struct v4l2_fmtdesc *f)
4183 +{
4184 + /*
4185 + * Input formats
4186 + * H.265 Slice only
4187 + */
4188 + if (f->index == 0) {
4189 + f->pixelformat = V4L2_PIX_FMT_HEVC_SLICE;
4190 + return 0;
4191 + }
4192 +
4193 + return -EINVAL;
4194 +}
4195 +
4196 +static int hevc_d_hevc_validate_sps(const struct v4l2_ctrl_hevc_sps * const sps)
4197 +{
4198 + const unsigned int ctb_log2_size_y =
4199 + sps->log2_min_luma_coding_block_size_minus3 + 3 +
4200 + sps->log2_diff_max_min_luma_coding_block_size;
4201 + const unsigned int min_tb_log2_size_y =
4202 + sps->log2_min_luma_transform_block_size_minus2 + 2;
4203 + const unsigned int max_tb_log2_size_y = min_tb_log2_size_y +
4204 + sps->log2_diff_max_min_luma_transform_block_size;
4205 +
4206 + /* Local limitations */
4207 + if (sps->pic_width_in_luma_samples < 32 ||
4208 + sps->pic_width_in_luma_samples > 4096)
4209 + return 0;
4210 + if (sps->pic_height_in_luma_samples < 32 ||
4211 + sps->pic_height_in_luma_samples > 4096)
4212 + return 0;
4213 + if (!(sps->bit_depth_luma_minus8 == 0 ||
4214 + sps->bit_depth_luma_minus8 == 2))
4215 + return 0;
4216 + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
4217 + return 0;
4218 + if (sps->chroma_format_idc != 1)
4219 + return 0;
4220 +
4221 + /* Limits from H.265 7.4.3.2.1 */
4222 + if (sps->log2_max_pic_order_cnt_lsb_minus4 > 12)
4223 + return 0;
4224 + if (sps->sps_max_dec_pic_buffering_minus1 > 15)
4225 + return 0;
4226 + if (sps->sps_max_num_reorder_pics >
4227 + sps->sps_max_dec_pic_buffering_minus1)
4228 + return 0;
4229 + if (ctb_log2_size_y > 6)
4230 + return 0;
4231 + if (max_tb_log2_size_y > 5)
4232 + return 0;
4233 + if (max_tb_log2_size_y > ctb_log2_size_y)
4234 + return 0;
4235 + if (sps->max_transform_hierarchy_depth_inter >
4236 + (ctb_log2_size_y - min_tb_log2_size_y))
4237 + return 0;
4238 + if (sps->max_transform_hierarchy_depth_intra >
4239 + (ctb_log2_size_y - min_tb_log2_size_y))
4240 + return 0;
4241 + /* Check pcm stuff */
4242 + if (sps->num_short_term_ref_pic_sets > 64)
4243 + return 0;
4244 + if (sps->num_long_term_ref_pics_sps > 32)
4245 + return 0;
4246 + return 1;
4247 +}
4248 +
4249 +static u32 pixelformat_from_sps(const struct v4l2_ctrl_hevc_sps * const sps,
4250 + const int index)
4251 +{
4252 + u32 pf = 0;
4253 +
4254 + if (!is_sps_set(sps) || !hevc_d_hevc_validate_sps(sps)) {
4255 + /* Treat this as an error? For now return both */
4256 + if (index == 0)
4257 + pf = V4L2_PIX_FMT_NV12MT_COL128;
4258 + else if (index == 1)
4259 + pf = V4L2_PIX_FMT_NV12MT_10_COL128;
4260 + } else if (index == 0) {
4261 + if (sps->bit_depth_luma_minus8 == 0)
4262 + pf = V4L2_PIX_FMT_NV12MT_COL128;
4263 + else if (sps->bit_depth_luma_minus8 == 2)
4264 + pf = V4L2_PIX_FMT_NV12MT_10_COL128;
4265 + }
4266 +
4267 + return pf;
4268 +}
4269 +
4270 +static void copy_color(struct v4l2_pix_format_mplane *d,
4271 + const struct v4l2_pix_format_mplane *s)
4272 +{
4273 + d->colorspace = s->colorspace;
4274 + d->xfer_func = s->xfer_func;
4275 + d->ycbcr_enc = s->ycbcr_enc;
4276 + d->quantization = s->quantization;
4277 +}
4278 +
4279 +static struct v4l2_pix_format_mplane
4280 +hevc_d_hevc_default_dst_fmt(struct hevc_d_ctx * const ctx)
4281 +{
4282 + const struct v4l2_ctrl_hevc_sps * const sps =
4283 + hevc_d_find_control_data(ctx, V4L2_CID_STATELESS_HEVC_SPS);
4284 + struct v4l2_pix_format_mplane pix_fmt;
4285 +
4286 + memset(&pix_fmt, 0, sizeof(pix_fmt));
4287 + if (is_sps_set(sps)) {
4288 + pix_fmt.width = sps->pic_width_in_luma_samples;
4289 + pix_fmt.height = sps->pic_height_in_luma_samples;
4290 + pix_fmt.pixelformat = pixelformat_from_sps(sps, 0);
4291 + }
4292 +
4293 + hevc_d_prepare_dst_format(&pix_fmt);
4294 + copy_color(&pix_fmt, &ctx->src_fmt);
4295 +
4296 + return pix_fmt;
4297 +}
4298 +
4299 +static u32 hevc_d_hevc_get_dst_pixelformat(struct hevc_d_ctx * const ctx,
4300 + const int index)
4301 +{
4302 + const struct v4l2_ctrl_hevc_sps * const sps =
4303 + hevc_d_find_control_data(ctx, V4L2_CID_STATELESS_HEVC_SPS);
4304 +
4305 + return pixelformat_from_sps(sps, index);
4306 +}
4307 +
4308 +static int hevc_d_enum_fmt_vid_cap(struct file *file, void *priv,
4309 + struct v4l2_fmtdesc *f)
4310 +{
4311 + struct hevc_d_ctx * const ctx = hevc_d_file2ctx(file);
4312 +
4313 + const u32 pf = hevc_d_hevc_get_dst_pixelformat(ctx, f->index);
4314 +
4315 + if (pf == 0)
4316 + return -EINVAL;
4317 +
4318 + f->pixelformat = pf;
4319 + return 0;
4320 +}
4321 +
4322 +/*
4323 + * get dst format - sets it to default if otherwise unset
4324 + * returns a pointer to the struct as a convienience
4325 + */
4326 +static struct v4l2_pix_format_mplane *get_dst_fmt(struct hevc_d_ctx *const ctx)
4327 +{
4328 + if (!ctx->dst_fmt_set)
4329 + ctx->dst_fmt = hevc_d_hevc_default_dst_fmt(ctx);
4330 + return &ctx->dst_fmt;
4331 +}
4332 +
4333 +static int hevc_d_g_fmt_vid_cap(struct file *file, void *priv,
4334 + struct v4l2_format *f)
4335 +{
4336 + struct hevc_d_ctx *ctx = hevc_d_file2ctx(file);
4337 +
4338 + f->fmt.pix_mp = *get_dst_fmt(ctx);
4339 + return 0;
4340 +}
4341 +
4342 +static int hevc_d_g_fmt_vid_out(struct file *file, void *priv,
4343 + struct v4l2_format *f)
4344 +{
4345 + struct hevc_d_ctx *ctx = hevc_d_file2ctx(file);
4346 +
4347 + f->fmt.pix_mp = ctx->src_fmt;
4348 + return 0;
4349 +}
4350 +
4351 +static int hevc_d_try_fmt_vid_cap(struct file *file, void *priv,
4352 + struct v4l2_format *f)
4353 +{
4354 + struct hevc_d_ctx *ctx = hevc_d_file2ctx(file);
4355 + const struct v4l2_ctrl_hevc_sps * const sps =
4356 + hevc_d_find_control_data(ctx, V4L2_CID_STATELESS_HEVC_SPS);
4357 + u32 pixelformat;
4358 + int i;
4359 +
4360 + for (i = 0; (pixelformat = pixelformat_from_sps(sps, i)) != 0; i++) {
4361 + if (f->fmt.pix_mp.pixelformat == pixelformat)
4362 + break;
4363 + }
4364 +
4365 + /*
4366 + * We don't have any way of finding out colourspace so believe
4367 + * anything we are told - take anything set in src as a default
4368 + */
4369 + if (f->fmt.pix_mp.colorspace == V4L2_COLORSPACE_DEFAULT)
4370 + copy_color(&f->fmt.pix_mp, &ctx->src_fmt);
4371 +
4372 + f->fmt.pix_mp.pixelformat = pixelformat;
4373 + hevc_d_prepare_dst_format(&f->fmt.pix_mp);
4374 + return 0;
4375 +}
4376 +
4377 +static int hevc_d_try_fmt_vid_out(struct file *file, void *priv,
4378 + struct v4l2_format *f)
4379 +{
4380 + hevc_d_prepare_src_format(&f->fmt.pix_mp);
4381 + return 0;
4382 +}
4383 +
4384 +static int hevc_d_s_fmt_vid_cap(struct file *file, void *priv,
4385 + struct v4l2_format *f)
4386 +{
4387 + struct hevc_d_ctx *ctx = hevc_d_file2ctx(file);
4388 + struct vb2_queue *vq;
4389 + int ret;
4390 +
4391 + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
4392 + if (vb2_is_busy(vq))
4393 + return -EBUSY;
4394 +
4395 + ret = hevc_d_try_fmt_vid_cap(file, priv, f);
4396 + if (ret)
4397 + return ret;
4398 +
4399 + ctx->dst_fmt = f->fmt.pix_mp;
4400 + ctx->dst_fmt_set = 1;
4401 +
4402 + return 0;
4403 +}
4404 +
4405 +static int hevc_d_s_fmt_vid_out(struct file *file, void *priv,
4406 + struct v4l2_format *f)
4407 +{
4408 + struct hevc_d_ctx *ctx = hevc_d_file2ctx(file);
4409 + struct vb2_queue *vq;
4410 + int ret;
4411 +
4412 + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
4413 + if (vb2_is_busy(vq))
4414 + return -EBUSY;
4415 +
4416 + ret = hevc_d_try_fmt_vid_out(file, priv, f);
4417 + if (ret)
4418 + return ret;
4419 +
4420 + ctx->src_fmt = f->fmt.pix_mp;
4421 + ctx->dst_fmt_set = 0; /* Setting src invalidates dst */
4422 +
4423 + /* Propagate colorspace information to capture. */
4424 + copy_color(&ctx->dst_fmt, &f->fmt.pix_mp);
4425 + return 0;
4426 +}
4427 +
4428 +const struct v4l2_ioctl_ops hevc_d_ioctl_ops = {
4429 + .vidioc_querycap = hevc_d_querycap,
4430 +
4431 + .vidioc_enum_fmt_vid_cap = hevc_d_enum_fmt_vid_cap,
4432 + .vidioc_g_fmt_vid_cap_mplane = hevc_d_g_fmt_vid_cap,
4433 + .vidioc_try_fmt_vid_cap_mplane = hevc_d_try_fmt_vid_cap,
4434 + .vidioc_s_fmt_vid_cap_mplane = hevc_d_s_fmt_vid_cap,
4435 +
4436 + .vidioc_enum_fmt_vid_out = hevc_d_enum_fmt_vid_out,
4437 + .vidioc_g_fmt_vid_out_mplane = hevc_d_g_fmt_vid_out,
4438 + .vidioc_try_fmt_vid_out_mplane = hevc_d_try_fmt_vid_out,
4439 + .vidioc_s_fmt_vid_out_mplane = hevc_d_s_fmt_vid_out,
4440 +
4441 + .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
4442 + .vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
4443 + .vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
4444 + .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
4445 + .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
4446 + .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
4447 + .vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
4448 +
4449 + .vidioc_streamon = v4l2_m2m_ioctl_streamon,
4450 + .vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
4451 +
4452 + .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_stateless_try_decoder_cmd,
4453 + .vidioc_decoder_cmd = v4l2_m2m_ioctl_stateless_decoder_cmd,
4454 +
4455 + .vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
4456 + .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
4457 +};
4458 +
4459 +static int hevc_d_queue_setup(struct vb2_queue *vq, unsigned int *nbufs,
4460 + unsigned int *nplanes, unsigned int sizes[],
4461 + struct device *alloc_devs[])
4462 +{
4463 + struct hevc_d_ctx *ctx = vb2_get_drv_priv(vq);
4464 + struct v4l2_pix_format_mplane *pix_fmt;
4465 + int expected_nplanes;
4466 +
4467 + if (V4L2_TYPE_IS_OUTPUT(vq->type)) {
4468 + pix_fmt = &ctx->src_fmt;
4469 + expected_nplanes = 1;
4470 + } else {
4471 + pix_fmt = get_dst_fmt(ctx);
4472 + expected_nplanes = 2;
4473 + }
4474 +
4475 + if (*nplanes) {
4476 + if (*nplanes != expected_nplanes ||
4477 + sizes[0] < pix_fmt->plane_fmt[0].sizeimage ||
4478 + sizes[1] < pix_fmt->plane_fmt[1].sizeimage)
4479 + return -EINVAL;
4480 + } else {
4481 + sizes[0] = pix_fmt->plane_fmt[0].sizeimage;
4482 + if (V4L2_TYPE_IS_OUTPUT(vq->type)) {
4483 + *nplanes = 1;
4484 + } else {
4485 + sizes[1] = pix_fmt->plane_fmt[1].sizeimage;
4486 + *nplanes = 2;
4487 + }
4488 + }
4489 +
4490 + return 0;
4491 +}
4492 +
4493 +static void hevc_d_queue_cleanup(struct vb2_queue *vq, u32 state)
4494 +{
4495 + struct hevc_d_ctx *ctx = vb2_get_drv_priv(vq);
4496 + struct vb2_v4l2_buffer *vbuf;
4497 +
4498 + for (;;) {
4499 + if (V4L2_TYPE_IS_OUTPUT(vq->type))
4500 + vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
4501 + else
4502 + vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
4503 +
4504 + if (!vbuf)
4505 + return;
4506 +
4507 + v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req,
4508 + &ctx->hdl);
4509 + v4l2_m2m_buf_done(vbuf, state);
4510 + }
4511 +}
4512 +
4513 +static int hevc_d_buf_out_validate(struct vb2_buffer *vb)
4514 +{
4515 + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
4516 +
4517 + vbuf->field = V4L2_FIELD_NONE;
4518 + return 0;
4519 +}
4520 +
4521 +static int hevc_d_buf_prepare(struct vb2_buffer *vb)
4522 +{
4523 + struct vb2_queue *vq = vb->vb2_queue;
4524 + struct hevc_d_ctx *ctx = vb2_get_drv_priv(vq);
4525 + struct v4l2_pix_format_mplane *pix_fmt;
4526 +
4527 + if (V4L2_TYPE_IS_OUTPUT(vq->type))
4528 + pix_fmt = &ctx->src_fmt;
4529 + else
4530 + pix_fmt = &ctx->dst_fmt;
4531 +
4532 + if (vb2_plane_size(vb, 0) < pix_fmt->plane_fmt[0].sizeimage ||
4533 + vb2_plane_size(vb, 1) < pix_fmt->plane_fmt[1].sizeimage)
4534 + return -EINVAL;
4535 +
4536 + vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage);
4537 + vb2_set_plane_payload(vb, 1, pix_fmt->plane_fmt[1].sizeimage);
4538 +
4539 + return 0;
4540 +}
4541 +
4542 +/* Only stops the clock if streaom off on both output & capture */
4543 +static void stop_clock(struct hevc_d_dev *dev, struct hevc_d_ctx *ctx)
4544 +{
4545 + if (ctx->src_stream_on ||
4546 + ctx->dst_stream_on)
4547 + return;
4548 +
4549 + clk_set_min_rate(dev->clock, 0);
4550 + clk_disable_unprepare(dev->clock);
4551 +}
4552 +
4553 +/* Always starts the clock if it isn't already on this ctx */
4554 +static int start_clock(struct hevc_d_dev *dev, struct hevc_d_ctx *ctx)
4555 +{
4556 + int rv;
4557 +
4558 + rv = clk_set_min_rate(dev->clock, dev->max_clock_rate);
4559 + if (rv) {
4560 + dev_err(dev->dev, "Failed to set clock rate\n");
4561 + return rv;
4562 + }
4563 +
4564 + rv = clk_prepare_enable(dev->clock);
4565 + if (rv) {
4566 + dev_err(dev->dev, "Failed to enable clock\n");
4567 + return rv;
4568 + }
4569 +
4570 + return 0;
4571 +}
4572 +
4573 +static int hevc_d_start_streaming(struct vb2_queue *vq, unsigned int count)
4574 +{
4575 + struct hevc_d_ctx *ctx = vb2_get_drv_priv(vq);
4576 + struct hevc_d_dev *dev = ctx->dev;
4577 + int ret = 0;
4578 +
4579 + if (!V4L2_TYPE_IS_OUTPUT(vq->type)) {
4580 + ctx->dst_stream_on = 1;
4581 + goto ok;
4582 + }
4583 +
4584 + if (ctx->src_fmt.pixelformat != V4L2_PIX_FMT_HEVC_SLICE) {
4585 + ret = -EINVAL;
4586 + goto fail_cleanup;
4587 + }
4588 +
4589 + if (ctx->src_stream_on)
4590 + goto ok;
4591 +
4592 + ret = start_clock(dev, ctx);
4593 + if (ret)
4594 + goto fail_cleanup;
4595 +
4596 + ret = hevc_d_h265_start(ctx);
4597 + if (ret)
4598 + goto fail_stop_clock;
4599 +
4600 + ctx->src_stream_on = 1;
4601 +ok:
4602 + return 0;
4603 +
4604 +fail_stop_clock:
4605 + stop_clock(dev, ctx);
4606 +fail_cleanup:
4607 + v4l2_err(&dev->v4l2_dev, "%s: qtype=%d: FAIL\n", __func__, vq->type);
4608 + hevc_d_queue_cleanup(vq, VB2_BUF_STATE_QUEUED);
4609 + return ret;
4610 +}
4611 +
4612 +static void hevc_d_stop_streaming(struct vb2_queue *vq)
4613 +{
4614 + struct hevc_d_ctx *ctx = vb2_get_drv_priv(vq);
4615 + struct hevc_d_dev *dev = ctx->dev;
4616 +
4617 + if (V4L2_TYPE_IS_OUTPUT(vq->type)) {
4618 + ctx->src_stream_on = 0;
4619 + hevc_d_h265_stop(ctx);
4620 + } else {
4621 + ctx->dst_stream_on = 0;
4622 + }
4623 +
4624 + hevc_d_queue_cleanup(vq, VB2_BUF_STATE_ERROR);
4625 +
4626 + vb2_wait_for_all_buffers(vq);
4627 +
4628 + stop_clock(dev, ctx);
4629 +}
4630 +
4631 +static void hevc_d_buf_queue(struct vb2_buffer *vb)
4632 +{
4633 + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
4634 + struct hevc_d_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
4635 +
4636 + v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
4637 +}
4638 +
4639 +static void hevc_d_buf_request_complete(struct vb2_buffer *vb)
4640 +{
4641 + struct hevc_d_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
4642 +
4643 + v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
4644 +}
4645 +
4646 +static const struct vb2_ops hevc_d_qops = {
4647 + .queue_setup = hevc_d_queue_setup,
4648 + .buf_prepare = hevc_d_buf_prepare,
4649 + .buf_queue = hevc_d_buf_queue,
4650 + .buf_out_validate = hevc_d_buf_out_validate,
4651 + .buf_request_complete = hevc_d_buf_request_complete,
4652 + .start_streaming = hevc_d_start_streaming,
4653 + .stop_streaming = hevc_d_stop_streaming,
4654 + .wait_prepare = vb2_ops_wait_prepare,
4655 + .wait_finish = vb2_ops_wait_finish,
4656 +};
4657 +
4658 +int hevc_d_queue_init(void *priv, struct vb2_queue *src_vq,
4659 + struct vb2_queue *dst_vq)
4660 +{
4661 + struct hevc_d_ctx *ctx = priv;
4662 + int ret;
4663 +
4664 + src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
4665 + src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
4666 + src_vq->drv_priv = ctx;
4667 + src_vq->buf_struct_size = sizeof(struct hevc_d_buffer);
4668 + src_vq->ops = &hevc_d_qops;
4669 + src_vq->mem_ops = &vb2_dma_contig_memops;
4670 + src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
4671 + src_vq->lock = &ctx->ctx_mutex;
4672 + src_vq->dev = ctx->dev->dev;
4673 + src_vq->supports_requests = true;
4674 + src_vq->requires_requests = true;
4675 +
4676 + ret = vb2_queue_init(src_vq);
4677 + if (ret)
4678 + return ret;
4679 +
4680 + dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
4681 + dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
4682 + dst_vq->drv_priv = ctx;
4683 + dst_vq->buf_struct_size = sizeof(struct hevc_d_buffer);
4684 + dst_vq->min_queued_buffers = 1;
4685 + dst_vq->ops = &hevc_d_qops;
4686 + dst_vq->mem_ops = &vb2_dma_contig_memops;
4687 + dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
4688 + dst_vq->lock = &ctx->ctx_mutex;
4689 + dst_vq->dev = ctx->dev->dev;
4690 +
4691 + return vb2_queue_init(dst_vq);
4692 +}
4693 --- /dev/null
4694 +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.h
4695 @@ -0,0 +1,38 @@
4696 +/* SPDX-License-Identifier: GPL-2.0 */
4697 +/*
4698 + * Raspberry Pi HEVC driver
4699 + *
4700 + * Copyright (C) 2024 Raspberry Pi Ltd
4701 + *
4702 + * Based on the Cedrus VPU driver, that is:
4703 + *
4704 + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
4705 + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
4706 + * Copyright (C) 2018 Bootlin
4707 + */
4708 +
4709 +#ifndef _HEVC_D_VIDEO_H_
4710 +#define _HEVC_D_VIDEO_H_
4711 +
4712 +struct hevc_d_format {
4713 + u32 pixelformat;
4714 + u32 directions;
4715 + unsigned int capabilities;
4716 +};
4717 +
4718 +static inline int is_sps_set(const struct v4l2_ctrl_hevc_sps * const sps)
4719 +{
4720 + return sps && sps->pic_width_in_luma_samples;
4721 +}
4722 +
4723 +extern const struct v4l2_ioctl_ops hevc_d_ioctl_ops;
4724 +
4725 +int hevc_d_queue_init(void *priv, struct vb2_queue *src_vq,
4726 + struct vb2_queue *dst_vq);
4727 +
4728 +size_t hevc_d_bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8);
4729 +size_t hevc_d_round_up_size(const size_t x);
4730 +
4731 +void hevc_d_prepare_src_format(struct v4l2_pix_format_mplane *pix_fmt);
4732 +
4733 +#endif